diff options
author | Sebastian Huber <sebastian.huber@embedded-brains.de> | 2018-11-06 15:42:44 +0100 |
---|---|---|
committer | Sebastian Huber <sebastian.huber@embedded-brains.de> | 2018-11-15 10:56:14 +0100 |
commit | e0b4edbdcc3558d3f38af8398f995c2e9f019f07 (patch) | |
tree | ea91a5fcfb9b6a66a8c0b74cf68ff8d450ce17e0 /freebsd/sys | |
parent | Disable or make static kern_* functions (diff) | |
download | rtems-libbsd-e0b4edbdcc3558d3f38af8398f995c2e9f019f07.tar.bz2 |
Update to FreeBSD head 2018-11-15
Git mirror commit a18b0830c4be01b39489a891b63d6023ada6358a.
Update #3472.
Diffstat (limited to 'freebsd/sys')
71 files changed, 1610 insertions, 1001 deletions
diff --git a/freebsd/sys/crypto/rijndael/rijndael-api-fst.c b/freebsd/sys/crypto/rijndael/rijndael-api-fst.c index 1ae535a5..30816c80 100644 --- a/freebsd/sys/crypto/rijndael/rijndael-api-fst.c +++ b/freebsd/sys/crypto/rijndael/rijndael-api-fst.c @@ -38,7 +38,6 @@ typedef u_int8_t BYTE; int rijndael_makeKey(keyInstance *key, BYTE direction, int keyLen, const char *keyMaterial) { - u_int8_t cipherKey[RIJNDAEL_MAXKB]; if (key == NULL) { return BAD_KEY_INSTANCE; @@ -61,13 +60,12 @@ int rijndael_makeKey(keyInstance *key, BYTE direction, int keyLen, } /* initialize key schedule: */ - memcpy(cipherKey, key->keyMaterial, keyLen/8); if (direction == DIR_ENCRYPT) { - key->Nr = rijndaelKeySetupEnc(key->rk, cipherKey, keyLen); + key->Nr = rijndaelKeySetupEnc(key->rk, key->keyMaterial, keyLen); } else { - key->Nr = rijndaelKeySetupDec(key->rk, cipherKey, keyLen); + key->Nr = rijndaelKeySetupDec(key->rk, key->keyMaterial, keyLen); } - rijndaelKeySetupEnc(key->ek, cipherKey, keyLen); + rijndaelKeySetupEnc(key->ek, key->keyMaterial, keyLen); return TRUE; } @@ -188,6 +186,7 @@ int rijndael_blockEncrypt(cipherInstance *cipher, keyInstance *key, return BAD_CIPHER_STATE; } + explicit_bzero(block, sizeof(block)); return 128*numBlocks; } @@ -260,6 +259,7 @@ int rijndael_padEncrypt(cipherInstance *cipher, keyInstance *key, return BAD_CIPHER_STATE; } + explicit_bzero(block, sizeof(block)); return 16*(numBlocks + 1); } @@ -359,12 +359,13 @@ int rijndael_blockDecrypt(cipherInstance *cipher, keyInstance *key, return BAD_CIPHER_STATE; } + explicit_bzero(block, sizeof(block)); return 128*numBlocks; } int rijndael_padDecrypt(cipherInstance *cipher, keyInstance *key, const BYTE *input, int inputOctets, BYTE *outBuffer) { - int i, numBlocks, padLen; + int i, numBlocks, padLen, rval; u_int8_t block[16]; u_int32_t iv[4]; @@ -394,11 +395,13 @@ int rijndael_padDecrypt(cipherInstance *cipher, keyInstance *key, rijndaelDecrypt(key->rk, key->Nr, input, block); padLen = block[15]; if (padLen >= 16) { - return BAD_DATA; + rval = BAD_DATA; + goto out; } for (i = 16 - padLen; i < 16; i++) { if (block[i] != padLen) { - return BAD_DATA; + rval = BAD_DATA; + goto out; } } memcpy(outBuffer, block, 16 - padLen); @@ -426,11 +429,13 @@ int rijndael_padDecrypt(cipherInstance *cipher, keyInstance *key, ((u_int32_t*)block)[3] ^= iv[3]; padLen = block[15]; if (padLen <= 0 || padLen > 16) { - return BAD_DATA; + rval = BAD_DATA; + goto out; } for (i = 16 - padLen; i < 16; i++) { if (block[i] != padLen) { - return BAD_DATA; + rval = BAD_DATA; + goto out; } } memcpy(outBuffer, block, 16 - padLen); @@ -440,5 +445,9 @@ int rijndael_padDecrypt(cipherInstance *cipher, keyInstance *key, return BAD_CIPHER_STATE; } - return 16*numBlocks - padLen; + rval = 16*numBlocks - padLen; + +out: + explicit_bzero(block, sizeof(block)); + return rval; } diff --git a/freebsd/sys/dev/e1000/em_txrx.c b/freebsd/sys/dev/e1000/em_txrx.c index 3ceada31..c2b60743 100644 --- a/freebsd/sys/dev/e1000/em_txrx.c +++ b/freebsd/sys/dev/e1000/em_txrx.c @@ -460,7 +460,13 @@ em_isc_txd_credits_update(void *arg, uint16_t txqid, bool clear) ntxd = scctx->isc_ntxd[0]; do { delta = (int32_t)cur - (int32_t)prev; + /* + * XXX This appears to be a hack for first-packet. + * A correct fix would prevent prev == cur in the first place. + */ MPASS(prev == 0 || delta != 0); + if (prev == 0 && cur == 0) + delta += 1; if (delta < 0) delta += ntxd; DPRINTF(iflib_get_dev(adapter->ctx), diff --git a/freebsd/sys/dev/e1000/igb_txrx.c b/freebsd/sys/dev/e1000/igb_txrx.c index 32bab4bf..c54315f0 100644 --- a/freebsd/sys/dev/e1000/igb_txrx.c +++ b/freebsd/sys/dev/e1000/igb_txrx.c @@ -335,7 +335,13 @@ igb_isc_txd_credits_update(void *arg, uint16_t txqid, bool clear) ntxd = scctx->isc_ntxd[0]; do { delta = (int32_t)cur - (int32_t)prev; + /* + * XXX This appears to be a hack for first-packet. + * A correct fix would prevent prev == cur in the first place. + */ MPASS(prev == 0 || delta != 0); + if (prev == 0 && cur == 0) + delta += 1; if (delta < 0) delta += ntxd; diff --git a/freebsd/sys/dev/evdev/cdev.c b/freebsd/sys/dev/evdev/cdev.c index 5ae14fed..a9370e7e 100644 --- a/freebsd/sys/dev/evdev/cdev.c +++ b/freebsd/sys/dev/evdev/cdev.c @@ -351,6 +351,19 @@ evdev_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, if (client->ec_revoked || evdev == NULL) return (ENODEV); + /* + * Fix evdev state corrupted with discarding of kdb events. + * EVIOCGKEY and EVIOCGLED ioctls can suffer from this. + */ + if (evdev->ev_kdb_active) { + EVDEV_LOCK(evdev); + if (evdev->ev_kdb_active) { + evdev->ev_kdb_active = false; + evdev_restore_after_kdb(evdev); + } + EVDEV_UNLOCK(evdev); + } + /* file I/O ioctl handling */ switch (cmd) { case FIOSETOWN: diff --git a/freebsd/sys/dev/evdev/evdev.c b/freebsd/sys/dev/evdev/evdev.c index a355ec50..63e651a2 100644 --- a/freebsd/sys/dev/evdev/evdev.c +++ b/freebsd/sys/dev/evdev/evdev.c @@ -34,9 +34,11 @@ #include <sys/param.h> #include <sys/bitstring.h> #include <sys/conf.h> +#include <sys/kdb.h> #include <sys/kernel.h> #include <sys/malloc.h> #include <sys/module.h> +#include <sys/proc.h> #include <sys/sysctl.h> #include <sys/systm.h> @@ -769,6 +771,30 @@ evdev_send_event(struct evdev_dev *evdev, uint16_t type, uint16_t code, } } +void +evdev_restore_after_kdb(struct evdev_dev *evdev) +{ + int code; + + EVDEV_LOCK_ASSERT(evdev); + + /* Report postponed leds */ + for (code = 0; code < LED_CNT; code++) + if (bit_test(evdev->ev_kdb_led_states, code)) + evdev_send_event(evdev, EV_LED, code, + !bit_test(evdev->ev_led_states, code)); + bit_nclear(evdev->ev_kdb_led_states, 0, LED_MAX); + + /* Release stuck keys (CTRL + ALT + ESC) */ + evdev_stop_repeat(evdev); + for (code = 0; code < KEY_CNT; code++) { + if (bit_test(evdev->ev_key_states, code)) { + evdev_send_event(evdev, EV_KEY, code, KEY_EVENT_UP); + evdev_send_event(evdev, EV_SYN, SYN_REPORT, 1); + } + } +} + int evdev_push_event(struct evdev_dev *evdev, uint16_t type, uint16_t code, int32_t value) @@ -777,8 +803,26 @@ evdev_push_event(struct evdev_dev *evdev, uint16_t type, uint16_t code, if (evdev_check_event(evdev, type, code, value) != 0) return (EINVAL); + /* + * Discard all but LEDs kdb events as unrelated to userspace. + * Aggregate LED updates and postpone reporting until kdb deactivation. + */ + if (kdb_active || SCHEDULER_STOPPED()) { + evdev->ev_kdb_active = true; + if (type == EV_LED) + bit_set(evdev->ev_kdb_led_states, + bit_test(evdev->ev_led_states, code) != value); + return (0); + } + EVDEV_ENTER(evdev); + /* Fix evdev state corrupted with discarding of kdb events */ + if (evdev->ev_kdb_active) { + evdev->ev_kdb_active = false; + evdev_restore_after_kdb(evdev); + } + evdev_modify_event(evdev, type, code, &value); if (type == EV_SYN && code == SYN_REPORT && bit_test(evdev->ev_flags, EVDEV_FLAG_MT_AUTOREL)) diff --git a/freebsd/sys/dev/evdev/evdev_private.h b/freebsd/sys/dev/evdev/evdev_private.h index 05206a9d..71bdecaa 100644 --- a/freebsd/sys/dev/evdev/evdev_private.h +++ b/freebsd/sys/dev/evdev/evdev_private.h @@ -117,6 +117,10 @@ struct evdev_dev bitstr_t bit_decl(ev_sw_states, SW_CNT); bool ev_report_opened; + /* KDB state: */ + bool ev_kdb_active; + bitstr_t bit_decl(ev_kdb_led_states, LED_CNT); + /* Multitouch protocol type B state: */ struct evdev_mt * ev_mt; @@ -132,9 +136,14 @@ struct evdev_dev LIST_HEAD(, evdev_client) ev_clients; }; +#define SYSTEM_CONSOLE_LOCK &Giant + #define EVDEV_LOCK(evdev) mtx_lock((evdev)->ev_lock) #define EVDEV_UNLOCK(evdev) mtx_unlock((evdev)->ev_lock) -#define EVDEV_LOCK_ASSERT(evdev) mtx_assert((evdev)->ev_lock, MA_OWNED) +#define EVDEV_LOCK_ASSERT(evdev) do { \ + if ((evdev)->ev_lock != SYSTEM_CONSOLE_LOCK) \ + mtx_assert((evdev)->ev_lock, MA_OWNED); \ +} while (0) #define EVDEV_ENTER(evdev) do { \ if ((evdev)->ev_lock_type == EV_LOCK_INTERNAL) \ EVDEV_LOCK(evdev); \ @@ -185,6 +194,7 @@ int evdev_cdev_destroy(struct evdev_dev *); bool evdev_event_supported(struct evdev_dev *, uint16_t); void evdev_set_abs_bit(struct evdev_dev *, uint16_t); void evdev_set_absinfo(struct evdev_dev *, uint16_t, struct input_absinfo *); +void evdev_restore_after_kdb(struct evdev_dev *); /* Client interface: */ int evdev_register_client(struct evdev_dev *, struct evdev_client *); diff --git a/freebsd/sys/dev/usb/controller/dwc_otg.c b/freebsd/sys/dev/usb/controller/dwc_otg.c index 1332b485..005c7ece 100644 --- a/freebsd/sys/dev/usb/controller/dwc_otg.c +++ b/freebsd/sys/dev/usb/controller/dwc_otg.c @@ -1434,6 +1434,19 @@ dwc_otg_host_data_rx(struct dwc_otg_softc *sc, struct dwc_otg_td *td) goto receive_pkt; } } else if (td->ep_type == UE_ISOCHRONOUS) { + if (td->hcsplt != 0) { + /* + * Sometimes the complete + * split packet may be queued + * too early and the + * transaction translator will + * return a NAK. Ignore + * this message and retry the + * complete split instead. + */ + DPRINTF("Retrying complete split\n"); + goto receive_pkt; + } goto complete; } td->did_nak = 1; diff --git a/freebsd/sys/dev/usb/input/uhid.c b/freebsd/sys/dev/usb/input/uhid.c index 90761ab7..8570f676 100644 --- a/freebsd/sys/dev/usb/input/uhid.c +++ b/freebsd/sys/dev/usb/input/uhid.c @@ -677,6 +677,8 @@ uhid_probe(device_t dev) { struct usb_attach_arg *uaa = device_get_ivars(dev); int error; + void *buf; + uint16_t len; DPRINTFN(11, "\n"); @@ -703,6 +705,25 @@ uhid_probe(device_t dev) !usb_test_quirk(uaa, UQ_UMS_IGNORE)))) return (ENXIO); + /* Check for mandatory multitouch usages to give wmt(4) a chance */ + if (!usb_test_quirk(uaa, UQ_WMT_IGNORE)) { + error = usbd_req_get_hid_desc(uaa->device, NULL, + &buf, &len, M_USBDEV, uaa->info.bIfaceIndex); + /* Let HID decscriptor-less devices to be handled at attach */ + if (!error) { + if (hid_locate(buf, len, + HID_USAGE2(HUP_DIGITIZERS, HUD_CONTACT_MAX), + hid_feature, 0, NULL, NULL, NULL) && + hid_locate(buf, len, + HID_USAGE2(HUP_DIGITIZERS, HUD_CONTACTID), + hid_input, 0, NULL, NULL, NULL)) { + free(buf, M_USBDEV); + return (ENXIO); + } + free(buf, M_USBDEV); + } + } + return (BUS_PROBE_GENERIC); } diff --git a/freebsd/sys/dev/usb/input/ukbd.c b/freebsd/sys/dev/usb/input/ukbd.c index 8fb450bc..770d1d3f 100644 --- a/freebsd/sys/dev/usb/input/ukbd.c +++ b/freebsd/sys/dev/usb/input/ukbd.c @@ -1363,7 +1363,7 @@ ukbd_attach(device_t dev) if (sc->sc_flags & UKBD_FLAG_SCROLLLOCK) evdev_support_led(evdev, LED_SCROLLL); - if (evdev_register(evdev)) + if (evdev_register_mtx(evdev, &Giant)) evdev_free(evdev); else sc->sc_evdev = evdev; diff --git a/freebsd/sys/fs/devfs/devfs_vnops.c b/freebsd/sys/fs/devfs/devfs_vnops.c index dfa8413b..176664db 100644 --- a/freebsd/sys/fs/devfs/devfs_vnops.c +++ b/freebsd/sys/fs/devfs/devfs_vnops.c @@ -782,6 +782,29 @@ devfs_ioctl_f(struct file *fp, u_long com, void *data, struct ucred *cred, struc return (error); } +void * +fiodgname_buf_get_ptr(void *fgnp, u_long com) +{ + union { + struct fiodgname_arg fgn; +#ifdef COMPAT_FREEBSD32 + struct fiodgname_arg32 fgn32; +#endif + } *fgnup; + + fgnup = fgnp; + switch (com) { + case FIODGNAME: + return (fgnup->fgn.buf); +#ifdef COMPAT_FREEBSD32 + case FIODGNAME_32: + return ((void *)(uintptr_t)fgnup->fgn32.buf); +#endif + default: + panic("Unhandled ioctl command %ld", com); + } +} + static int devfs_ioctl(struct vop_ioctl_args *ap) { @@ -804,24 +827,27 @@ devfs_ioctl(struct vop_ioctl_args *ap) KASSERT(dev->si_refcount > 0, ("devfs: un-referenced struct cdev *(%s)", devtoname(dev))); - if (com == FIODTYPE) { + switch (com) { + case FIODTYPE: *(int *)ap->a_data = dsw->d_flags & D_TYPEMASK; error = 0; - goto out; - } else if (com == FIODGNAME) { + break; + case FIODGNAME: +#ifdef COMPAT_FREEBSD32 + case FIODGNAME_32: +#endif fgn = ap->a_data; p = devtoname(dev); i = strlen(p) + 1; if (i > fgn->len) error = EINVAL; else - error = copyout(p, fgn->buf, i); - goto out; + error = copyout(p, fiodgname_buf_get_ptr(fgn, com), i); + break; + default: + error = dsw->d_ioctl(dev, com, ap->a_data, ap->a_fflag, td); } - error = dsw->d_ioctl(dev, com, ap->a_data, ap->a_fflag, td); - -out: dev_relthread(dev, ref); if (error == ENOIOCTL) error = ENOTTY; @@ -1370,6 +1396,8 @@ devfs_readdir(struct vop_readdir_args *ap) if (dp->d_reclen > uio->uio_resid) break; dp->d_fileno = de->de_inode; + /* NOTE: d_off is the offset for the *next* entry. */ + dp->d_off = off + dp->d_reclen; if (off >= uio->uio_offset) { error = vfs_read_dirent(ap, dp, off); if (error) diff --git a/freebsd/sys/kern/init_main.c b/freebsd/sys/kern/init_main.c index 43eaea5c..8fcc314b 100644 --- a/freebsd/sys/kern/init_main.c +++ b/freebsd/sys/kern/init_main.c @@ -54,6 +54,7 @@ __FBSDID("$FreeBSD$"); #include <sys/param.h> #include <sys/kernel.h> +#include <sys/epoch.h> #include <sys/exec.h> #include <sys/file.h> #include <sys/filedesc.h> @@ -538,6 +539,7 @@ proc0_init(void *dummy __unused) td->td_pflags = TDP_KTHREAD; td->td_cpuset = cpuset_thread0(); td->td_domain.dr_policy = td->td_cpuset->cs_domain; + epoch_thread_init(td); prison0_init(); p->p_peers = 0; p->p_leader = p; diff --git a/freebsd/sys/kern/kern_mbuf.c b/freebsd/sys/kern/kern_mbuf.c index 09cae9b2..f94eda5b 100644 --- a/freebsd/sys/kern/kern_mbuf.c +++ b/freebsd/sys/kern/kern_mbuf.c @@ -36,6 +36,7 @@ __FBSDID("$FreeBSD$"); #include <sys/param.h> #include <sys/conf.h> +#include <sys/domainset.h> #include <sys/malloc.h> #include <sys/systm.h> #include <sys/mbuf.h> @@ -423,8 +424,6 @@ nd_buf_import(void *arg, void **store, int count, int domain __unused, struct mbuf *m; int i; - KASSERT(!dumping, ("%s: ran out of pre-allocated mbufs", __func__)); - q = arg; for (i = 0; i < count; i++) { @@ -434,6 +433,8 @@ nd_buf_import(void *arg, void **store, int count, int domain __unused, trash_init(m, q == &nd_mbufq ? MSIZE : nd_clsize, flags); store[i] = m; } + KASSERT((flags & M_WAITOK) == 0 || i == count, + ("%s: ran out of pre-allocated mbufs", __func__)); return (i); } @@ -460,8 +461,6 @@ nd_pack_import(void *arg __unused, void **store, int count, int domain __unused, void *clust; int i; - KASSERT(!dumping, ("%s: ran out of pre-allocated mbufs", __func__)); - for (i = 0; i < count; i++) { m = m_get(MT_DATA, M_NOWAIT); if (m == NULL) @@ -474,6 +473,8 @@ nd_pack_import(void *arg __unused, void **store, int count, int domain __unused, mb_ctor_clust(clust, nd_clsize, m, 0); store[i] = m; } + KASSERT((flags & M_WAITOK) == 0 || i == count, + ("%s: ran out of pre-allocated mbufs", __func__)); return (i); } @@ -605,8 +606,9 @@ mbuf_jumbo_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *flags, /* Inform UMA that this allocator uses kernel_map/object. */ *flags = UMA_SLAB_KERNEL; #ifndef __rtems__ - return ((void *)kmem_alloc_contig_domain(domain, bytes, wait, - (vm_paddr_t)0, ~(vm_paddr_t)0, 1, 0, VM_MEMATTR_DEFAULT)); + return ((void *)kmem_alloc_contig_domainset(DOMAINSET_FIXED(domain), + bytes, wait, (vm_paddr_t)0, ~(vm_paddr_t)0, 1, 0, + VM_MEMATTR_DEFAULT)); #else /* __rtems__ */ return ((void *)malloc(bytes, M_TEMP, wait)); #endif /* __rtems__ */ diff --git a/freebsd/sys/kern/kern_sysctl.c b/freebsd/sys/kern/kern_sysctl.c index 3baea2e4..ea9c1821 100644 --- a/freebsd/sys/kern/kern_sysctl.c +++ b/freebsd/sys/kern/kern_sysctl.c @@ -1704,6 +1704,53 @@ retry: } /* + * Based on on sysctl_handle_int() convert microseconds to a sbintime. + */ +int +sysctl_usec_to_sbintime(SYSCTL_HANDLER_ARGS) +{ + int error; + int64_t tt; + sbintime_t sb; + + tt = *(int64_t *)arg1; + sb = ustosbt(tt); + + error = sysctl_handle_64(oidp, &sb, 0, req); + if (error || !req->newptr) + return (error); + + tt = sbttous(sb); + *(int64_t *)arg1 = tt; + + return (0); +} + +/* + * Based on on sysctl_handle_int() convert milliseconds to a sbintime. + */ +int +sysctl_msec_to_sbintime(SYSCTL_HANDLER_ARGS) +{ + int error; + int64_t tt; + sbintime_t sb; + + tt = *(int64_t *)arg1; + sb = mstosbt(tt); + + error = sysctl_handle_64(oidp, &sb, 0, req); + if (error || !req->newptr) + return (error); + + tt = sbttoms(sb); + *(int64_t *)arg1 = tt; + + return (0); +} + + +/* * Transfer functions to/from kernel space. * XXX: rather untested at this point */ diff --git a/freebsd/sys/kern/subr_blist.c b/freebsd/sys/kern/subr_blist.c index e5b40e62..79a5a7b4 100644 --- a/freebsd/sys/kern/subr_blist.c +++ b/freebsd/sys/kern/subr_blist.c @@ -48,11 +48,11 @@ * upper bound on a potential allocation, but not necessarily a tight upper * bound. * - * The radix tree also implements two collapsed states for meta nodes: - * the ALL-ALLOCATED state and the ALL-FREE state. If a meta node is - * in either of these two states, all information contained underneath - * the node is considered stale. These states are used to optimize - * allocation and freeing operations. + * The bitmap field in each node directs the search for available blocks. + * For a leaf node, a bit is set if the corresponding block is free. For a + * meta node, a bit is set if the corresponding subtree contains a free + * block somewhere within it. The search at a meta node considers only + * children of that node that represent a range that includes a free block. * * The hinting greatly increases code efficiency for allocations while * the general radix structure optimizes both allocations and frees. The @@ -61,19 +61,19 @@ * * The blist code wires all necessary memory at creation time. Neither * allocations nor frees require interaction with the memory subsystem. - * The non-blocking features of the blist code are used in the swap code - * (vm/swap_pager.c). + * The non-blocking nature of allocations and frees is required by swap + * code (vm/swap_pager.c). * - * LAYOUT: The radix tree is laid out recursively using a - * linear array. Each meta node is immediately followed (laid out - * sequentially in memory) by BLIST_META_RADIX lower level nodes. This - * is a recursive structure but one that can be easily scanned through - * a very simple 'skip' calculation. In order to support large radixes, - * portions of the tree may reside outside our memory allocation. We - * handle this with an early-termination optimization (when bighint is - * set to -1) on the scan. The memory allocation is only large enough - * to cover the number of blocks requested at creation time even if it - * must be encompassed in larger root-node radix. + * LAYOUT: The radix tree is laid out recursively using a linear array. + * Each meta node is immediately followed (laid out sequentially in + * memory) by BLIST_META_RADIX lower level nodes. This is a recursive + * structure but one that can be easily scanned through a very simple + * 'skip' calculation. The memory allocation is only large enough to + * cover the number of blocks requested at creation time. Nodes that + * represent blocks beyond that limit, nodes that would never be read + * or written, are not allocated, so that the last of the + * BLIST_META_RADIX lower level nodes of a some nodes may not be + * allocated. * * NOTE: the allocator cannot currently allocate more than * BLIST_BMAP_RADIX blocks per call. It will panic with 'allocation too @@ -107,6 +107,7 @@ __FBSDID("$FreeBSD$"); #define BLIST_DEBUG #endif +#include <sys/errno.h> #include <sys/types.h> #include <sys/malloc.h> #include <sys/sbuf.h> @@ -120,7 +121,7 @@ __FBSDID("$FreeBSD$"); #define bitcount64(x) __bitcount64((uint64_t)(x)) #define malloc(a,b,c) calloc(a, 1) #define free(a,b) free(a) -static __inline int imax(int a, int b) { return (a > b ? a : b); } +#define ummin(a,b) ((a) < (b) ? (a) : (b)) #include <sys/blist.h> @@ -181,6 +182,18 @@ radix_to_skip(daddr_t radix) } /* + * Provide a mask with count bits set, starting as position n. + */ +static inline u_daddr_t +bitrange(int n, int count) +{ + + return (((u_daddr_t)-1 << n) & + ((u_daddr_t)-1 >> (BLIST_BMAP_RADIX - (n + count)))); +} + + +/* * Use binary search, or a faster method, to find the 1 bit in a u_daddr_t. * Assumes that the argument has only one bit set. */ @@ -222,9 +235,7 @@ blist_t blist_create(daddr_t blocks, int flags) { blist_t bl; - daddr_t i, last_block; - u_daddr_t nodes, radix, skip; - int digit; + u_daddr_t nodes, radix; if (blocks == 0) panic("invalid block count"); @@ -232,30 +243,13 @@ blist_create(daddr_t blocks, int flags) /* * Calculate the radix and node count used for scanning. */ - last_block = blocks - 1; + nodes = 1; radix = BLIST_BMAP_RADIX; - while (radix < blocks) { - if (((last_block / radix + 1) & BLIST_META_MASK) != 0) - /* - * We must widen the blist to avoid partially - * filled nodes. - */ - last_block |= radix - 1; + while (radix <= blocks) { + nodes += 1 + (blocks - 1) / radix; radix *= BLIST_META_RADIX; } - /* - * Count the meta-nodes in the expanded tree, including the final - * terminator, from the bottom level up to the root. - */ - nodes = 1; - if (radix - blocks >= BLIST_BMAP_RADIX) - nodes++; - last_block /= BLIST_BMAP_RADIX; - while (last_block > 0) { - nodes += last_block + 1; - last_block /= BLIST_META_RADIX; - } bl = malloc(offsetof(struct blist, bl_root[nodes]), M_SWAP, flags | M_ZERO); if (bl == NULL) @@ -263,33 +257,6 @@ blist_create(daddr_t blocks, int flags) bl->bl_blocks = blocks; bl->bl_radix = radix; - bl->bl_cursor = 0; - - /* - * Initialize the empty tree by filling in root values, then initialize - * just the terminators in the rest of the tree. - */ - bl->bl_root[0].bm_bighint = 0; - if (radix == BLIST_BMAP_RADIX) - bl->bl_root[0].u.bmu_bitmap = 0; - else - bl->bl_root[0].u.bmu_avail = 0; - last_block = blocks - 1; - i = 0; - while (radix > BLIST_BMAP_RADIX) { - radix /= BLIST_META_RADIX; - skip = radix_to_skip(radix); - digit = last_block / radix; - i += 1 + digit * skip; - if (digit != BLIST_META_MASK) { - /* - * Add a terminator. - */ - bl->bl_root[i + skip].bm_bighint = (daddr_t)-1; - bl->bl_root[i + skip].u.bmu_bitmap = 0; - } - last_block %= radix; - } #if defined(BLIST_DEBUG) printf( @@ -323,6 +290,9 @@ blist_alloc(blist_t bl, daddr_t count) { daddr_t blk; + if (count > BLIST_MAX_ALLOC) + panic("allocation too large"); + /* * This loop iterates at most twice. An allocation failure in the * first iteration leads to a second iteration only if the cursor was @@ -333,12 +303,13 @@ blist_alloc(blist_t bl, daddr_t count) blk = blst_meta_alloc(bl->bl_root, bl->bl_cursor, count, bl->bl_radix); if (blk != SWAPBLK_NONE) { + bl->bl_avail -= count; bl->bl_cursor = blk + count; if (bl->bl_cursor == bl->bl_blocks) bl->bl_cursor = 0; return (blk); - } else if (bl->bl_cursor != 0) - bl->bl_cursor = 0; + } + bl->bl_cursor = 0; } return (SWAPBLK_NONE); } @@ -350,10 +321,7 @@ daddr_t blist_avail(blist_t bl) { - if (bl->bl_radix == BLIST_BMAP_RADIX) - return (bitcount64(bl->bl_root->u.bmu_bitmap)); - else - return (bl->bl_root->u.bmu_avail); + return (bl->bl_avail); } /* @@ -365,7 +333,10 @@ void blist_free(blist_t bl, daddr_t blkno, daddr_t count) { + if (blkno < 0 || blkno + count > bl->bl_blocks) + panic("freeing invalid range"); blst_meta_free(bl->bl_root, blkno, count, bl->bl_radix); + bl->bl_avail += count; } /* @@ -377,8 +348,13 @@ blist_free(blist_t bl, daddr_t blkno, daddr_t count) daddr_t blist_fill(blist_t bl, daddr_t blkno, daddr_t count) { + daddr_t filled; - return (blst_meta_fill(bl->bl_root, blkno, count, bl->bl_radix)); + if (blkno < 0 || blkno + count > bl->bl_blocks) + panic("filling invalid range"); + filled = blst_meta_fill(bl->bl_root, blkno, count, bl->bl_radix); + bl->bl_avail -= filled; + return (filled); } /* @@ -416,8 +392,11 @@ blist_resize(blist_t *pbl, daddr_t count, int freenew, int flags) void blist_print(blist_t bl) { - printf("BLIST cursor = %08jx {\n", (uintmax_t)bl->bl_cursor); - blst_radix_print(bl->bl_root, 0, bl->bl_radix, 4); + printf("BLIST avail = %jd, cursor = %08jx {\n", + (uintmax_t)bl->bl_avail, (uintmax_t)bl->bl_cursor); + + if (bl->bl_root->bm_bitmap != 0) + blst_radix_print(bl->bl_root, 0, bl->bl_radix, 4); printf("}\n"); } @@ -571,16 +550,11 @@ blist_stats(blist_t bl, struct sbuf *s) * Check for skippable subtrees starting at i. */ while (radix > BLIST_BMAP_RADIX) { - if (bl->bl_root[nodes].u.bmu_avail == 0) { + if (bl->bl_root[nodes].bm_bitmap == 0) { if (gap_stats_counting(stats)) update_gap_stats(stats, i); break; } - if (bl->bl_root[nodes].u.bmu_avail == radix) { - if (!gap_stats_counting(stats)) - update_gap_stats(stats, i); - break; - } /* * Skip subtree root. @@ -592,7 +566,7 @@ blist_stats(blist_t bl, struct sbuf *s) /* * Scan leaf. */ - mask = bl->bl_root[nodes].u.bmu_bitmap; + mask = bl->bl_root[nodes].bm_bitmap; diff = mask ^ (mask << 1); if (gap_stats_counting(stats)) diff ^= 1; @@ -620,7 +594,57 @@ blist_stats(blist_t bl, struct sbuf *s) */ /* - * blist_leaf_alloc() - allocate at a leaf in the radix tree (a bitmap). + * BLST_NEXT_LEAF_ALLOC() - allocate the first few blocks in the next leaf. + * + * 'scan' is a leaf node, associated with a block containing 'blk'. + * The next leaf node could be adjacent, or several nodes away if the + * least common ancestor of 'scan' and its neighbor is several levels + * up. Use 'blk' to determine how many meta-nodes lie between the + * leaves. If the next leaf has enough initial bits set, clear them + * and clear the bits in the meta nodes on the path up to the least + * common ancestor to mark any subtrees made completely empty. + */ +static int +blst_next_leaf_alloc(blmeta_t *scan, daddr_t blk, int count) +{ + blmeta_t *next; + daddr_t skip; + u_daddr_t radix; + int digit; + + next = scan + 1; + blk += BLIST_BMAP_RADIX; + radix = BLIST_BMAP_RADIX; + while ((digit = ((blk / radix) & BLIST_META_MASK)) == 0 && + (next->bm_bitmap & 1) == 1) { + next++; + radix *= BLIST_META_RADIX; + } + if (((next->bm_bitmap + 1) & ~((u_daddr_t)-1 << count)) != 0) { + /* + * The next leaf doesn't have enough free blocks at the + * beginning to complete the spanning allocation. + */ + return (ENOMEM); + } + /* Clear the first 'count' bits in the next leaf to allocate. */ + next->bm_bitmap &= (u_daddr_t)-1 << count; + + /* + * Update bitmaps of next-ancestors, up to least common ancestor. + */ + skip = radix_to_skip(radix); + while (radix != BLIST_BMAP_RADIX && next->bm_bitmap == 0) { + (--next)->bm_bitmap ^= 1; + radix /= BLIST_META_RADIX; + } + if (next->bm_bitmap == 0) + scan[-digit * skip].bm_bitmap ^= (u_daddr_t)1 << digit; + return (0); +} + +/* + * BLST_LEAF_ALLOC() - allocate at a leaf in the radix tree (a bitmap). * * This is the core of the allocator and is optimized for the * BLIST_BMAP_RADIX block allocation case. Otherwise, execution @@ -635,15 +659,15 @@ blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int count) range1 = 0; count1 = count - 1; num_shifts = fls(count1); - mask = scan->u.bmu_bitmap; + mask = scan->bm_bitmap; while ((-mask & ~mask) != 0 && num_shifts > 0) { /* * If bit i is set in mask, then bits in [i, i+range1] are set - * in scan->u.bmu_bitmap. The value of range1 is equal to + * in scan->bm_bitmap. The value of range1 is equal to * count1 >> num_shifts. Grow range and reduce num_shifts to 0, * while preserving these invariants. The updates to mask leave * fewer bits set, but each bit that remains set represents a - * longer string of consecutive bits set in scan->u.bmu_bitmap. + * longer string of consecutive bits set in scan->bm_bitmap. * If more updates to mask cannot clear more bits, because mask * is partitioned with all 0 bits preceding all 1 bits, the loop * terminates immediately. @@ -687,31 +711,14 @@ blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int count) * An allocation within this leaf is impossible, so a successful * allocation depends on the next leaf providing some of the blocks. */ - if (((blk / BLIST_BMAP_RADIX + 1) & BLIST_META_MASK) == 0) { - /* - * The next leaf has a different meta-node parent, so it - * is not necessarily initialized. Update bighint, - * comparing the range found at the end of mask to the - * largest earlier range that could have been made to - * vanish in the initial processing of mask. - */ - scan->bm_bighint = imax(BLIST_BMAP_RADIX - lo, range1); - return (SWAPBLK_NONE); - } - hi -= BLIST_BMAP_RADIX; - if (((scan[1].u.bmu_bitmap + 1) & ~((u_daddr_t)-1 << hi)) != 0) { + if (blst_next_leaf_alloc(scan, blk, hi - BLIST_BMAP_RADIX) != 0) /* - * The next leaf doesn't have enough free blocks at the - * beginning to complete the spanning allocation. The - * hint cannot be updated, because the same allocation - * request could be satisfied later, by this leaf, if - * the state of the next leaf changes, and without any - * changes to this leaf. + * The hint cannot be updated, because the same + * allocation request could be satisfied later, by this + * leaf, if the state of the next leaf changes, and + * without any changes to this leaf. */ return (SWAPBLK_NONE); - } - /* Clear the first 'hi' bits in the next leaf, allocating them. */ - scan[1].u.bmu_bitmap &= (u_daddr_t)-1 << hi; hi = BLIST_BMAP_RADIX; } @@ -726,12 +733,9 @@ blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int count) } else { /* Clear the bits of mask at position 'hi' and higher. */ mask &= (u_daddr_t)-1 >> (BLIST_BMAP_RADIX - hi); - /* If this allocation uses all the bits, clear the hint. */ - if (mask == scan->u.bmu_bitmap) - scan->bm_bighint = 0; } /* Clear the allocated bits from this leaf. */ - scan->u.bmu_bitmap &= ~mask; + scan->bm_bitmap &= ~mask; return ((blk & ~BLIST_BMAP_MASK) + lo); } @@ -746,81 +750,61 @@ blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int count) static daddr_t blst_meta_alloc(blmeta_t *scan, daddr_t cursor, daddr_t count, u_daddr_t radix) { - daddr_t blk, i, next_skip, r, skip; - int child; + daddr_t blk, i, r, skip; + u_daddr_t bit, mask; bool scan_from_start; + int digit; if (radix == BLIST_BMAP_RADIX) return (blst_leaf_alloc(scan, cursor, count)); - if (scan->u.bmu_avail < count) { - /* - * The meta node's hint must be too large if the allocation - * exceeds the number of free blocks. Reduce the hint, and - * return failure. - */ - scan->bm_bighint = scan->u.bmu_avail; - return (SWAPBLK_NONE); - } blk = cursor & -radix; + scan_from_start = (cursor == blk); + radix /= BLIST_META_RADIX; skip = radix_to_skip(radix); - next_skip = skip / BLIST_META_RADIX; + mask = scan->bm_bitmap; + + /* Discard any candidates that appear before cursor. */ + digit = (cursor / radix) & BLIST_META_MASK; + mask &= (u_daddr_t)-1 << digit; /* - * An ALL-FREE meta node requires special handling before allocating - * any of its blocks. + * If the first try is for a block that includes the cursor, pre-undo + * the digit * radix offset in the first call; otherwise, ignore the + * cursor entirely. */ - if (scan->u.bmu_avail == radix) { - radix /= BLIST_META_RADIX; - - /* - * Reinitialize each of the meta node's children. An ALL-FREE - * meta node cannot have a terminator in any subtree. - */ - for (i = 1; i < skip; i += next_skip) { - if (next_skip == 1) - scan[i].u.bmu_bitmap = (u_daddr_t)-1; - else - scan[i].u.bmu_avail = radix; - scan[i].bm_bighint = radix; - } - } else { - radix /= BLIST_META_RADIX; - } + if (((mask >> digit) & 1) == 1) + cursor -= digit * radix; + else + cursor = blk; - if (count > radix) { - /* - * The allocation exceeds the number of blocks that are - * managed by a subtree of this meta node. - */ - panic("allocation too large"); - } - scan_from_start = cursor == blk; - child = (cursor - blk) / radix; - blk += child * radix; - for (i = 1 + child * next_skip; i < skip; i += next_skip) { + /* + * Examine the nonempty subtree associated with each bit set in mask. + */ + do { + bit = mask & -mask; + digit = bitpos(bit); + i = 1 + digit * skip; if (count <= scan[i].bm_bighint) { /* * The allocation might fit beginning in the i'th subtree. */ - r = blst_meta_alloc(&scan[i], - cursor > blk ? cursor : blk, count, radix); + r = blst_meta_alloc(&scan[i], cursor + digit * radix, + count, radix); if (r != SWAPBLK_NONE) { - scan->u.bmu_avail -= count; + if (scan[i].bm_bitmap == 0) + scan->bm_bitmap ^= bit; return (r); } - } else if (scan[i].bm_bighint == (daddr_t)-1) { - /* - * Terminator - */ - break; } - blk += radix; - } + cursor = blk; + } while ((mask ^= bit) != 0); /* - * We couldn't allocate count in this subtree, update bighint. + * We couldn't allocate count in this subtree. If the whole tree was + * scanned, and the last tree node is allocated, update bighint. */ - if (scan_from_start && scan->bm_bighint >= count) + if (scan_from_start && !(digit == BLIST_META_RADIX - 1 && + scan[i].bm_bighint == BLIST_MAX_ALLOC)) scan->bm_bighint = count - 1; return (SWAPBLK_NONE); @@ -834,7 +818,6 @@ static void blst_leaf_free(blmeta_t *scan, daddr_t blk, int count) { u_daddr_t mask; - int n; /* * free some data in this bitmap @@ -842,20 +825,10 @@ blst_leaf_free(blmeta_t *scan, daddr_t blk, int count) * \_________/\__/ * count n */ - n = blk & BLIST_BMAP_MASK; - mask = ((u_daddr_t)-1 << n) & - ((u_daddr_t)-1 >> (BLIST_BMAP_RADIX - count - n)); - if (scan->u.bmu_bitmap & mask) + mask = bitrange(blk & BLIST_BMAP_MASK, count); + if (scan->bm_bitmap & mask) panic("freeing free block"); - scan->u.bmu_bitmap |= mask; - - /* - * We could probably do a better job here. We are required to make - * bighint at least as large as the biggest contiguous block of - * data. If we just shoehorn it, a little extra overhead will - * be incured on the next allocation (but only that one typically). - */ - scan->bm_bighint = BLIST_BMAP_RADIX; + scan->bm_bitmap |= mask; } /* @@ -871,79 +844,37 @@ blst_leaf_free(blmeta_t *scan, daddr_t blk, int count) static void blst_meta_free(blmeta_t *scan, daddr_t freeBlk, daddr_t count, u_daddr_t radix) { - daddr_t blk, i, next_skip, skip, v; - int child; - - if (scan->bm_bighint == (daddr_t)-1) - panic("freeing invalid range"); - if (radix == BLIST_BMAP_RADIX) - return (blst_leaf_free(scan, freeBlk, count)); - skip = radix_to_skip(radix); - next_skip = skip / BLIST_META_RADIX; - - if (scan->u.bmu_avail == 0) { - /* - * ALL-ALLOCATED special case, with possible - * shortcut to ALL-FREE special case. - */ - scan->u.bmu_avail = count; - scan->bm_bighint = count; - - if (count != radix) { - for (i = 1; i < skip; i += next_skip) { - if (scan[i].bm_bighint == (daddr_t)-1) - break; - scan[i].bm_bighint = 0; - if (next_skip == 1) { - scan[i].u.bmu_bitmap = 0; - } else { - scan[i].u.bmu_avail = 0; - } - } - /* fall through */ - } - } else { - scan->u.bmu_avail += count; - /* scan->bm_bighint = radix; */ - } + daddr_t blk, endBlk, i, skip; + int digit, endDigit; /* - * ALL-FREE special case. + * We could probably do a better job here. We are required to make + * bighint at least as large as the biggest allocable block of data. + * If we just shoehorn it, a little extra overhead will be incurred + * on the next allocation (but only that one typically). */ + scan->bm_bighint = BLIST_MAX_ALLOC; - if (scan->u.bmu_avail == radix) - return; - if (scan->u.bmu_avail > radix) - panic("blst_meta_free: freeing already free blocks (%lld) %lld/%lld", - (long long)count, (long long)scan->u.bmu_avail, - (long long)radix); - - /* - * Break the free down into its components - */ + if (radix == BLIST_BMAP_RADIX) + return (blst_leaf_free(scan, freeBlk, count)); - blk = freeBlk & -radix; + endBlk = ummin(freeBlk + count, (freeBlk + radix) & -radix); radix /= BLIST_META_RADIX; - - child = (freeBlk - blk) / radix; - blk += child * radix; - i = 1 + child * next_skip; - while (i < skip && blk < freeBlk + count) { - v = blk + radix - freeBlk; - if (v > count) - v = count; - blst_meta_free(&scan[i], freeBlk, v, radix); - if (scan->bm_bighint < scan[i].bm_bighint) - scan->bm_bighint = scan[i].bm_bighint; - count -= v; - freeBlk += v; + skip = radix_to_skip(radix); + blk = freeBlk & -radix; + digit = (blk / radix) & BLIST_META_MASK; + endDigit = 1 + (((endBlk - 1) / radix) & BLIST_META_MASK); + scan->bm_bitmap |= bitrange(digit, endDigit - digit); + for (i = 1 + digit * skip; blk < endBlk; i += skip) { blk += radix; - i += next_skip; + count = ummin(blk, endBlk) - freeBlk; + blst_meta_free(&scan[i], freeBlk, count, radix); + freeBlk = blk; } } /* - * BLIST_RADIX_COPY() - copy one radix tree to another + * BLST_COPY() - copy one radix tree to another * * Locates free space in the source tree and frees it in the destination * tree. The space may not already be free in the destination. @@ -952,21 +883,21 @@ static void blst_copy(blmeta_t *scan, daddr_t blk, daddr_t radix, blist_t dest, daddr_t count) { - daddr_t i, next_skip, skip; + daddr_t endBlk, i, skip; /* * Leaf node */ if (radix == BLIST_BMAP_RADIX) { - u_daddr_t v = scan->u.bmu_bitmap; + u_daddr_t v = scan->bm_bitmap; if (v == (u_daddr_t)-1) { blist_free(dest, blk, count); } else if (v != 0) { int i; - for (i = 0; i < BLIST_BMAP_RADIX && i < count; ++i) { + for (i = 0; i < count; ++i) { if (v & ((u_daddr_t)1 << i)) blist_free(dest, blk + i, 1); } @@ -978,42 +909,22 @@ blst_copy(blmeta_t *scan, daddr_t blk, daddr_t radix, blist_t dest, * Meta node */ - if (scan->u.bmu_avail == 0) { + if (scan->bm_bitmap == 0) { /* * Source all allocated, leave dest allocated */ return; } - if (scan->u.bmu_avail == radix) { - /* - * Source all free, free entire dest - */ - if (count < radix) - blist_free(dest, blk, count); - else - blist_free(dest, blk, radix); - return; - } - - skip = radix_to_skip(radix); - next_skip = skip / BLIST_META_RADIX; + endBlk = blk + count; radix /= BLIST_META_RADIX; - - for (i = 1; count && i < skip; i += next_skip) { - if (scan[i].bm_bighint == (daddr_t)-1) - break; - - if (count >= radix) { - blst_copy(&scan[i], blk, radix, dest, radix); - count -= radix; - } else { - if (count) { - blst_copy(&scan[i], blk, radix, dest, count); - } - count = 0; - } + skip = radix_to_skip(radix); + for (i = 1; blk < endBlk; i += skip) { blk += radix; + count = radix; + if (blk >= endBlk) + count -= blk - endBlk; + blst_copy(&scan[i], blk - radix, radix, dest, count); } } @@ -1029,16 +940,13 @@ blst_leaf_fill(blmeta_t *scan, daddr_t blk, int count) { daddr_t nblks; u_daddr_t mask; - int n; - n = blk & BLIST_BMAP_MASK; - mask = ((u_daddr_t)-1 << n) & - ((u_daddr_t)-1 >> (BLIST_BMAP_RADIX - count - n)); + mask = bitrange(blk & BLIST_BMAP_MASK, count); /* Count the number of blocks that we are allocating. */ - nblks = bitcount64(scan->u.bmu_bitmap & mask); + nblks = bitcount64(scan->bm_bitmap & mask); - scan->u.bmu_bitmap &= ~mask; + scan->bm_bitmap &= ~mask; return (nblks); } @@ -1053,70 +961,27 @@ blst_leaf_fill(blmeta_t *scan, daddr_t blk, int count) static daddr_t blst_meta_fill(blmeta_t *scan, daddr_t allocBlk, daddr_t count, u_daddr_t radix) { - daddr_t blk, i, nblks, next_skip, skip, v; - int child; + daddr_t blk, endBlk, i, nblks, skip; + int digit; - if (scan->bm_bighint == (daddr_t)-1) - panic("filling invalid range"); - if (count > radix) { - /* - * The allocation exceeds the number of blocks that are - * managed by this node. - */ - panic("fill too large"); - } if (radix == BLIST_BMAP_RADIX) return (blst_leaf_fill(scan, allocBlk, count)); - if (count == radix || scan->u.bmu_avail == 0) { - /* - * ALL-ALLOCATED special case - */ - nblks = scan->u.bmu_avail; - scan->u.bmu_avail = 0; - scan->bm_bighint = 0; - return (nblks); - } + + endBlk = ummin(allocBlk + count, (allocBlk + radix) & -radix); + radix /= BLIST_META_RADIX; skip = radix_to_skip(radix); - next_skip = skip / BLIST_META_RADIX; blk = allocBlk & -radix; - - /* - * An ALL-FREE meta node requires special handling before allocating - * any of its blocks. - */ - if (scan->u.bmu_avail == radix) { - radix /= BLIST_META_RADIX; - - /* - * Reinitialize each of the meta node's children. An ALL-FREE - * meta node cannot have a terminator in any subtree. - */ - for (i = 1; i < skip; i += next_skip) { - if (next_skip == 1) - scan[i].u.bmu_bitmap = (u_daddr_t)-1; - else - scan[i].u.bmu_avail = radix; - scan[i].bm_bighint = radix; - } - } else { - radix /= BLIST_META_RADIX; - } - nblks = 0; - child = (allocBlk - blk) / radix; - blk += child * radix; - i = 1 + child * next_skip; - while (i < skip && blk < allocBlk + count) { - v = blk + radix - allocBlk; - if (v > count) - v = count; - nblks += blst_meta_fill(&scan[i], allocBlk, v, radix); - count -= v; - allocBlk += v; + while (blk < endBlk) { + digit = (blk / radix) & BLIST_META_MASK; + i = 1 + digit * skip; blk += radix; - i += next_skip; + count = ummin(blk, endBlk) - allocBlk; + nblks += blst_meta_fill(&scan[i], allocBlk, count, radix); + if (scan[i].bm_bitmap == 0) + scan->bm_bitmap &= ~((u_daddr_t)1 << digit); + allocBlk = blk; } - scan->u.bmu_avail -= nblks; return (nblks); } @@ -1125,64 +990,44 @@ blst_meta_fill(blmeta_t *scan, daddr_t allocBlk, daddr_t count, u_daddr_t radix) static void blst_radix_print(blmeta_t *scan, daddr_t blk, daddr_t radix, int tab) { - daddr_t i, next_skip, skip; + daddr_t skip; + u_daddr_t bit, mask; + int digit; if (radix == BLIST_BMAP_RADIX) { printf( - "%*.*s(%08llx,%lld): bitmap %016llx big=%lld\n", + "%*.*s(%08llx,%lld): bitmap %0*llx big=%lld\n", tab, tab, "", (long long)blk, (long long)radix, - (long long)scan->u.bmu_bitmap, + 1 + (BLIST_BMAP_RADIX - 1) / 4, + (long long)scan->bm_bitmap, (long long)scan->bm_bighint ); return; } - if (scan->u.bmu_avail == 0) { - printf( - "%*.*s(%08llx,%lld) ALL ALLOCATED\n", - tab, tab, "", - (long long)blk, - (long long)radix - ); - return; - } - if (scan->u.bmu_avail == radix) { - printf( - "%*.*s(%08llx,%lld) ALL FREE\n", - tab, tab, "", - (long long)blk, - (long long)radix - ); - return; - } - printf( - "%*.*s(%08llx,%lld): subtree (%lld/%lld) big=%lld {\n", + "%*.*s(%08llx): subtree (%lld/%lld) bitmap %0*llx big=%lld {\n", tab, tab, "", (long long)blk, (long long)radix, - (long long)scan->u.bmu_avail, (long long)radix, + 1 + (BLIST_META_RADIX - 1) / 4, + (long long)scan->bm_bitmap, (long long)scan->bm_bighint ); - skip = radix_to_skip(radix); - next_skip = skip / BLIST_META_RADIX; radix /= BLIST_META_RADIX; + skip = radix_to_skip(radix); tab += 4; - for (i = 1; i < skip; i += next_skip) { - if (scan[i].bm_bighint == (daddr_t)-1) { - printf( - "%*.*s(%08llx,%lld): Terminator\n", - tab, tab, "", - (long long)blk, (long long)radix - ); - break; - } - blst_radix_print(&scan[i], blk, radix, tab); - blk += radix; - } + mask = scan->bm_bitmap; + /* Examine the nonempty subtree associated with each bit set in mask */ + do { + bit = mask & -mask; + digit = bitpos(bit); + blst_radix_print(&scan[1 + digit * skip], blk + digit * radix, + radix, tab); + } while ((mask ^= bit) != 0); tab -= 4; printf( @@ -1198,7 +1043,7 @@ blst_radix_print(blmeta_t *scan, daddr_t blk, daddr_t radix, int tab) int main(int ac, char **av) { - int size = 1024; + int size = BLIST_META_RADIX * BLIST_BMAP_RADIX; int i; blist_t bl; struct sbuf *s; diff --git a/freebsd/sys/kern/subr_gtaskqueue.c b/freebsd/sys/kern/subr_gtaskqueue.c index e56e90d7..3f80cd2c 100644 --- a/freebsd/sys/kern/subr_gtaskqueue.c +++ b/freebsd/sys/kern/subr_gtaskqueue.c @@ -56,6 +56,8 @@ __FBSDID("$FreeBSD$"); static MALLOC_DEFINE(M_GTASKQUEUE, "gtaskqueue", "Group Task Queues"); static void gtaskqueue_thread_enqueue(void *); static void gtaskqueue_thread_loop(void *arg); +static int task_is_running(struct gtaskqueue *queue, struct gtask *gtask); +static void gtaskqueue_drain_locked(struct gtaskqueue *queue, struct gtask *gtask); TASKQGROUP_DEFINE(softirq, mp_ncpus, 1); TASKQGROUP_DEFINE(config, 1, 1); @@ -188,6 +190,44 @@ gtaskqueue_free(struct gtaskqueue *queue) free(queue, M_GTASKQUEUE); } +/* + * Wait for all to complete, then prevent it from being enqueued + */ +void +grouptask_block(struct grouptask *grouptask) +{ + struct gtaskqueue *queue = grouptask->gt_taskqueue; + struct gtask *gtask = &grouptask->gt_task; + +#ifdef INVARIANTS + if (queue == NULL) { + gtask_dump(gtask); + panic("queue == NULL"); + } +#endif + TQ_LOCK(queue); + gtask->ta_flags |= TASK_NOENQUEUE; + gtaskqueue_drain_locked(queue, gtask); + TQ_UNLOCK(queue); +} + +void +grouptask_unblock(struct grouptask *grouptask) +{ + struct gtaskqueue *queue = grouptask->gt_taskqueue; + struct gtask *gtask = &grouptask->gt_task; + +#ifdef INVARIANTS + if (queue == NULL) { + gtask_dump(gtask); + panic("queue == NULL"); + } +#endif + TQ_LOCK(queue); + gtask->ta_flags &= ~TASK_NOENQUEUE; + TQ_UNLOCK(queue); +} + int grouptaskqueue_enqueue(struct gtaskqueue *queue, struct gtask *gtask) { @@ -202,6 +242,10 @@ grouptaskqueue_enqueue(struct gtaskqueue *queue, struct gtask *gtask) TQ_UNLOCK(queue); return (0); } + if (gtask->ta_flags & TASK_NOENQUEUE) { + TQ_UNLOCK(queue); + return (EAGAIN); + } STAILQ_INSERT_TAIL(&queue->tq_queue, gtask, ta_link); gtask->ta_flags |= TASK_ENQUEUED; TQ_UNLOCK(queue); @@ -383,6 +427,13 @@ gtaskqueue_cancel(struct gtaskqueue *queue, struct gtask *gtask) return (error); } +static void +gtaskqueue_drain_locked(struct gtaskqueue *queue, struct gtask *gtask) +{ + while ((gtask->ta_flags & TASK_ENQUEUED) || task_is_running(queue, gtask)) + TQ_SLEEP(queue, gtask, &queue->tq_mutex, PWAIT, "-", 0); +} + void gtaskqueue_drain(struct gtaskqueue *queue, struct gtask *gtask) { @@ -391,8 +442,7 @@ gtaskqueue_drain(struct gtaskqueue *queue, struct gtask *gtask) WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__); TQ_LOCK(queue); - while ((gtask->ta_flags & TASK_ENQUEUED) || task_is_running(queue, gtask)) - TQ_SLEEP(queue, gtask, &queue->tq_mutex, PWAIT, "-", 0); + gtaskqueue_drain_locked(queue, gtask); TQ_UNLOCK(queue); } @@ -857,6 +907,7 @@ taskqgroup_detach(struct taskqgroup *qgroup, struct grouptask *gtask) { int i; + grouptask_block(gtask); mtx_lock(&qgroup->tqg_lock); for (i = 0; i < qgroup->tqg_cnt; i++) if (qgroup->tqg_queue[i].tgc_taskq == gtask->gt_taskqueue) @@ -867,6 +918,7 @@ taskqgroup_detach(struct taskqgroup *qgroup, struct grouptask *gtask) LIST_REMOVE(gtask, gt_list); mtx_unlock(&qgroup->tqg_lock); gtask->gt_taskqueue = NULL; + gtask->gt_task.ta_flags &= ~TASK_NOENQUEUE; } static void diff --git a/freebsd/sys/kern/sys_generic.c b/freebsd/sys/kern/sys_generic.c index b0da68a8..70638ad0 100644 --- a/freebsd/sys/kern/sys_generic.c +++ b/freebsd/sys/kern/sys_generic.c @@ -1366,16 +1366,15 @@ sys_poll(struct thread *td, struct poll_args *uap) } int -kern_poll(struct thread *td, struct pollfd *fds, u_int nfds, +kern_poll(struct thread *td, struct pollfd *ufds, u_int nfds, struct timespec *tsp, sigset_t *uset) { - struct pollfd *bits; - struct pollfd smallbits[32]; + struct pollfd *kfds; + struct pollfd stackfds[32]; sbintime_t sbt, precision, tmp; time_t over; struct timespec ts; int error; - size_t ni; precision = 0; if (tsp != NULL) { @@ -1402,18 +1401,24 @@ kern_poll(struct thread *td, struct pollfd *fds, u_int nfds, } else sbt = -1; + /* + * This is kinda bogus. We have fd limits, but that is not + * really related to the size of the pollfd array. Make sure + * we let the process use at least FD_SETSIZE entries and at + * least enough for the system-wide limits. We want to be reasonably + * safe, but not overly restrictive. + */ #ifndef __rtems__ if (nfds > maxfilesperproc && nfds > FD_SETSIZE) #else /* __rtems__ */ if (nfds > rtems_libio_number_iops) #endif /* __rtems__ */ return (EINVAL); - ni = nfds * sizeof(struct pollfd); - if (ni > sizeof(smallbits)) - bits = malloc(ni, M_TEMP, M_WAITOK); + if (nfds > nitems(stackfds)) + kfds = mallocarray(nfds, sizeof(*kfds), M_TEMP, M_WAITOK); else - bits = smallbits; - error = copyin(fds, bits, ni); + kfds = stackfds; + error = copyin(ufds, kfds, nfds * sizeof(*kfds)); if (error) goto done; @@ -1438,7 +1443,7 @@ kern_poll(struct thread *td, struct pollfd *fds, u_int nfds, seltdinit(td); /* Iterate until the timeout expires or descriptors become ready. */ for (;;) { - error = pollscan(td, bits, nfds); + error = pollscan(td, kfds, nfds); if (error || td->td_retval[0] != 0) break; error = seltdwait(td, sbt, precision); @@ -1457,13 +1462,13 @@ done: if (error == EWOULDBLOCK) error = 0; if (error == 0) { - error = pollout(td, bits, fds, nfds); + error = pollout(td, kfds, ufds, nfds); if (error) goto out; } out: - if (ni > sizeof(smallbits)) - free(bits, M_TEMP); + if (nfds > nitems(stackfds)) + free(kfds, M_TEMP); return (error); } #ifdef __rtems__ diff --git a/freebsd/sys/kern/sys_pipe.c b/freebsd/sys/kern/sys_pipe.c index f562b38f..b6616271 100755 --- a/freebsd/sys/kern/sys_pipe.c +++ b/freebsd/sys/kern/sys_pipe.c @@ -593,9 +593,8 @@ retry: #ifndef __rtems__ buffer = (caddr_t) vm_map_min(pipe_map); - error = vm_map_find(pipe_map, NULL, 0, - (vm_offset_t *) &buffer, size, 0, VMFS_ANY_SPACE, - VM_PROT_ALL, VM_PROT_ALL, 0); + error = vm_map_find(pipe_map, NULL, 0, (vm_offset_t *)&buffer, size, 0, + VMFS_ANY_SPACE, VM_PROT_RW, VM_PROT_RW, 0); if (error != KERN_SUCCESS) { #else /* __rtems__ */ (void)error; diff --git a/freebsd/sys/kern/tty.c b/freebsd/sys/kern/tty.c index f4a2b01f..aa9734cf 100644 --- a/freebsd/sys/kern/tty.c +++ b/freebsd/sys/kern/tty.c @@ -35,6 +35,7 @@ __FBSDID("$FreeBSD$"); #include <rtems/bsd/local/opt_capsicum.h> +#include <rtems/bsd/local/opt_printf.h> #include <sys/param.h> #include <sys/capsicum.h> @@ -108,6 +109,12 @@ SYSCTL_INT(_kern, OID_AUTO, tty_drainwait, CTLFLAG_RWTUN, #define TTYBUF_MAX 65536 +#ifdef PRINTF_BUFR_SIZE +#define TTY_PRBUF_SIZE PRINTF_BUFR_SIZE +#else +#define TTY_PRBUF_SIZE 256 +#endif + /* * Allocate buffer space if necessary, and set low watermarks, based on speed. * Note that the ttyxxxq_setsize() functions may drop and then reacquire the tty @@ -1065,7 +1072,9 @@ tty_alloc_mutex(struct ttydevsw *tsw, void *sc, struct mtx *mutex) PATCH_FUNC(busy); #undef PATCH_FUNC - tp = malloc(sizeof(struct tty), M_TTY, M_WAITOK|M_ZERO); + tp = malloc(sizeof(struct tty) + TTY_PRBUF_SIZE, M_TTY, + M_WAITOK | M_ZERO); + tp->t_prbufsz = TTY_PRBUF_SIZE; tp->t_devsw = tsw; tp->t_devswsoftc = sc; tp->t_flags = tsw->tsw_flags; diff --git a/freebsd/sys/kern/uipc_syscalls.c b/freebsd/sys/kern/uipc_syscalls.c index 992df712..f338629e 100644 --- a/freebsd/sys/kern/uipc_syscalls.c +++ b/freebsd/sys/kern/uipc_syscalls.c @@ -90,10 +90,12 @@ struct getsockaddr_sockaddr { char data[SOCK_MAXADDRLEN - sizeof(struct sockaddr)]; } __aligned(sizeof(long)); -static int getsockaddr(struct sockaddr **, caddr_t, size_t); +static int getsockaddr(struct sockaddr **, const struct sockaddr *, size_t); static int kern_getsockname(struct thread *, int, struct sockaddr **, socklen_t *); static int kern_listen(struct thread *, int, int); +static int kern_setsockopt(struct thread *, int, int, int, const void *, + enum uio_seg, socklen_t); static int kern_shutdown(struct thread *, int, int); static int kern_socket(struct thread *, int, int, int); static int kern_socketpair(struct thread *, int, int, int, int *); @@ -287,7 +289,7 @@ bind(int socket, const struct sockaddr *address, socklen_t address_len) struct thread *td = rtems_bsd_get_curthread_or_null(); struct bind_args ua = { .s = socket, - .name = (caddr_t) address, + .name = address, .namelen = address_len }; int error; @@ -363,6 +365,9 @@ sys_bindat(struct thread *td, struct bindat_args *uap) } #endif /* __rtems__ */ +#ifdef __rtems__ +static +#endif /* __rtems__ */ int sys_listen(struct thread *td, struct listen_args *uap) { @@ -681,7 +686,7 @@ connect(int socket, const struct sockaddr *address, socklen_t address_len) struct thread *td = rtems_bsd_get_curthread_or_null(); struct connect_args ua = { .s = socket, - .name = (caddr_t) address, + .name = address, .namelen = address_len }; int error; @@ -1111,7 +1116,7 @@ sys_sendto(struct thread *td, struct sendto_args *uap) struct msghdr msg; struct iovec aiov; - msg.msg_name = uap->to; + msg.msg_name = __DECONST(void *, uap->to); msg.msg_namelen = uap->tolen; msg.msg_iov = &aiov; msg.msg_iovlen = 1; @@ -1119,7 +1124,7 @@ sys_sendto(struct thread *td, struct sendto_args *uap) #ifdef COMPAT_OLDSOCK msg.msg_flags = 0; #endif - aiov.iov_base = uap->buf; + aiov.iov_base = __DECONST(void *, uap->buf); aiov.iov_len = uap->len; return (sendit(td, uap->s, &msg, uap->flags)); } @@ -1134,7 +1139,7 @@ sendto(int socket, const void *message, size_t length, int flags, .buf = (caddr_t) message, .len = length, .flags = flags, - .to = (caddr_t) dest_addr, + .to = dest_addr, .tolen = dest_len }; int error; @@ -1189,7 +1194,7 @@ osend(struct thread *td, struct osend_args *uap) msg.msg_namelen = 0; msg.msg_iov = &aiov; msg.msg_iovlen = 1; - aiov.iov_base = uap->buf; + aiov.iov_base = __DECONST(void *, uap->buf); aiov.iov_len = uap->len; msg.msg_control = 0; msg.msg_flags = 0; @@ -1669,9 +1674,6 @@ shutdown(int socket, int how) #endif /* __rtems__ */ #ifdef __rtems__ -static int kern_setsockopt( struct thread *td, int s, int level, int name, - void *val, enum uio_seg valseg, socklen_t valsize); - static #endif /* __rtems__ */ int @@ -1707,7 +1709,7 @@ setsockopt(int socket, int level, int option_name, const void *option_value, #endif /* __rtems__ */ int -kern_setsockopt(struct thread *td, int s, int level, int name, void *val, +kern_setsockopt(struct thread *td, int s, int level, int name, const void *val, enum uio_seg valseg, socklen_t valsize) { struct socket *so; @@ -1723,7 +1725,7 @@ kern_setsockopt(struct thread *td, int s, int level, int name, void *val, sopt.sopt_dir = SOPT_SET; sopt.sopt_level = level; sopt.sopt_name = name; - sopt.sopt_val = val; + sopt.sopt_val = __DECONST(void *, val); sopt.sopt_valsize = valsize; switch (valseg) { case UIO_USERSPACE: @@ -2111,7 +2113,7 @@ sockargs(struct mbuf **mp, char *buf, socklen_t buflen, int type) } int -getsockaddr(struct sockaddr **namp, caddr_t uaddr, size_t len) +getsockaddr(struct sockaddr **namp, const struct sockaddr *uaddr, size_t len) { struct sockaddr *sa; #ifndef __rtems__ diff --git a/freebsd/sys/net/if.c b/freebsd/sys/net/if.c index 7721af11..d846482e 100644 --- a/freebsd/sys/net/if.c +++ b/freebsd/sys/net/if.c @@ -1131,6 +1131,9 @@ if_detach_internal(struct ifnet *ifp, int vmove, struct if_clone **ifcp) * the work top-down for us. */ if (shutdown) { + /* Give interface users the chance to clean up. */ + EVENTHANDLER_INVOKE(ifnet_departure_event, ifp); + /* * In case of a vmove we are done here without error. * If we would signal an error it would lead to the same @@ -1773,35 +1776,29 @@ if_data_copy(struct ifnet *ifp, struct if_data *ifd) void if_addr_rlock(struct ifnet *ifp) { - MPASS(*(uint64_t *)&ifp->if_addr_et == 0); - epoch_enter_preempt(net_epoch_preempt, &ifp->if_addr_et); + + epoch_enter_preempt(net_epoch_preempt, curthread->td_et); } void if_addr_runlock(struct ifnet *ifp) { - epoch_exit_preempt(net_epoch_preempt, &ifp->if_addr_et); -#ifdef INVARIANTS - bzero(&ifp->if_addr_et, sizeof(struct epoch_tracker)); -#endif + + epoch_exit_preempt(net_epoch_preempt, curthread->td_et); } void if_maddr_rlock(if_t ifp) { - MPASS(*(uint64_t *)&ifp->if_maddr_et == 0); - epoch_enter_preempt(net_epoch_preempt, &ifp->if_maddr_et); + epoch_enter_preempt(net_epoch_preempt, curthread->td_et); } void if_maddr_runlock(if_t ifp) { - epoch_exit_preempt(net_epoch_preempt, &ifp->if_maddr_et); -#ifdef INVARIANTS - bzero(&ifp->if_maddr_et, sizeof(struct epoch_tracker)); -#endif + epoch_exit_preempt(net_epoch_preempt, curthread->td_et); } /* diff --git a/freebsd/sys/net/if_ethersubr.c b/freebsd/sys/net/if_ethersubr.c index 01e757e5..96ed309a 100644 --- a/freebsd/sys/net/if_ethersubr.c +++ b/freebsd/sys/net/if_ethersubr.c @@ -477,6 +477,26 @@ ether_output_frame(struct ifnet *ifp, struct mbuf *m) return (0); } +#ifdef EXPERIMENTAL +#if defined(INET6) && defined(INET) + /* draft-ietf-6man-ipv6only-flag */ + /* Catch ETHERTYPE_IP, and ETHERTYPE_ARP if we are v6-only. */ + if ((ND_IFINFO(ifp)->flags & ND6_IFF_IPV6_ONLY) != 0) { + struct ether_header *eh; + + eh = mtod(m, struct ether_header *); + switch (ntohs(eh->ether_type)) { + case ETHERTYPE_IP: + case ETHERTYPE_ARP: + m_freem(m); + return (EAFNOSUPPORT); + /* NOTREACHED */ + break; + }; + } +#endif +#endif + /* * Queue message on interface, update output statistics if * successful, and start output if interface not yet active. diff --git a/freebsd/sys/net/if_ipsec.c b/freebsd/sys/net/if_ipsec.c index 08465911..7cc2c961 100644 --- a/freebsd/sys/net/if_ipsec.c +++ b/freebsd/sys/net/if_ipsec.c @@ -220,6 +220,12 @@ ipsec_clone_destroy(struct ifnet *ifp) sx_xlock(&ipsec_ioctl_sx); sc = ifp->if_softc; ipsec_delete_tunnel(sc); + /* + * Delete softc from idhash on interface destroy, since + * ipsec_delete_tunnel() keeps reqid unchanged. + */ + if (sc->reqid != 0) + CK_LIST_REMOVE(sc, idhash); bpfdetach(ifp); if_detach(ifp); ifp->if_softc = NULL; @@ -273,6 +279,13 @@ vnet_ipsec_uninit(const void *unused __unused) if_clone_detach(V_ipsec_cloner); free(V_ipsec_idhtbl, M_IPSEC); + /* + * Use V_ipsec_idhtbl pointer as indicator that VNET is going to be + * destroyed, it is used by ipsec_srcaddr() callback. + */ + V_ipsec_idhtbl = NULL; + IPSEC_WAIT(); + #ifdef INET if (IS_DEFAULT_VNET(curvnet)) ip_encap_unregister_srcaddr(ipsec4_srctab); @@ -785,6 +798,10 @@ ipsec_srcaddr(void *arg __unused, const struct sockaddr *sa, struct ipsec_softc *sc; struct secasindex *saidx; + /* Check that VNET is ready */ + if (V_ipsec_idhtbl == NULL) + return; + MPASS(in_epoch(net_epoch_preempt)); CK_LIST_FOREACH(sc, ipsec_srchash(sa), srchash) { if (sc->family == 0) @@ -1031,13 +1048,11 @@ ipsec_delete_tunnel(struct ipsec_softc *sc) sc->ifp->if_drv_flags &= ~IFF_DRV_RUNNING; if (sc->family != 0) { CK_LIST_REMOVE(sc, srchash); - IPSEC_WAIT(); - + sc->family = 0; /* * Make sure that ipsec_if_input() will not do access * to softc's policies. */ - sc->family = 0; IPSEC_WAIT(); key_unregister_ifnet(sc->sp, IPSEC_SPCOUNT); diff --git a/freebsd/sys/net/if_lagg.c b/freebsd/sys/net/if_lagg.c index 632ea744..85099115 100644 --- a/freebsd/sys/net/if_lagg.c +++ b/freebsd/sys/net/if_lagg.c @@ -635,11 +635,18 @@ lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp) { struct lagg_softc *sc_ptr; struct lagg_port *lp, *tlp; - int error, i; + struct ifreq ifr; + int error, i, oldmtu; uint64_t *pval; LAGG_XLOCK_ASSERT(sc); + if (sc->sc_ifp == ifp) { + if_printf(sc->sc_ifp, + "cannot add a lagg to itself as a port\n"); + return (EINVAL); + } + /* Limit the maximal number of lagg ports */ if (sc->sc_count >= LAGG_MAX_PORTS) return (ENOSPC); @@ -658,12 +665,25 @@ lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp) return (EPROTONOSUPPORT); /* Allow the first Ethernet member to define the MTU */ - if (CK_SLIST_EMPTY(&sc->sc_ports)) + oldmtu = -1; + if (CK_SLIST_EMPTY(&sc->sc_ports)) { sc->sc_ifp->if_mtu = ifp->if_mtu; - else if (sc->sc_ifp->if_mtu != ifp->if_mtu) { - if_printf(sc->sc_ifp, "invalid MTU for %s\n", - ifp->if_xname); - return (EINVAL); + } else if (sc->sc_ifp->if_mtu != ifp->if_mtu) { + if (ifp->if_ioctl == NULL) { + if_printf(sc->sc_ifp, "cannot change MTU for %s\n", + ifp->if_xname); + return (EINVAL); + } + oldmtu = ifp->if_mtu; + strlcpy(ifr.ifr_name, ifp->if_xname, sizeof(ifr.ifr_name)); + ifr.ifr_mtu = sc->sc_ifp->if_mtu; + error = (*ifp->if_ioctl)(ifp, SIOCSIFMTU, (caddr_t)&ifr); + if (error != 0) { + if_printf(sc->sc_ifp, "invalid MTU for %s\n", + ifp->if_xname); + return (error); + } + ifr.ifr_mtu = oldmtu; } lp = malloc(sizeof(struct lagg_port), M_DEVBUF, M_WAITOK|M_ZERO); @@ -675,6 +695,9 @@ lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp) if (ifp == sc_ptr->sc_ifp) { LAGG_LIST_UNLOCK(); free(lp, M_DEVBUF); + if (oldmtu != -1) + (*ifp->if_ioctl)(ifp, SIOCSIFMTU, + (caddr_t)&ifr); return (EINVAL); /* XXX disable stacking for the moment, its untested */ #ifdef LAGG_PORT_STACKING @@ -683,6 +706,9 @@ lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp) LAGG_MAX_STACKING) { LAGG_LIST_UNLOCK(); free(lp, M_DEVBUF); + if (oldmtu != -1) + (*ifp->if_ioctl)(ifp, SIOCSIFMTU, + (caddr_t)&ifr); return (E2BIG); } #endif @@ -748,6 +774,8 @@ lagg_port_create(struct lagg_softc *sc, struct ifnet *ifp) if ((error = lagg_proto_addport(sc, lp)) != 0) { /* Remove the port, without calling pr_delport. */ lagg_port_destroy(lp, 0); + if (oldmtu != -1) + (*ifp->if_ioctl)(ifp, SIOCSIFMTU, (caddr_t)&ifr); return (error); } @@ -1466,8 +1494,31 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) break; case SIOCSIFMTU: - /* Do not allow the MTU to be directly changed */ - error = EINVAL; + LAGG_XLOCK(sc); + CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { + if (lp->lp_ioctl != NULL) + error = (*lp->lp_ioctl)(lp->lp_ifp, cmd, data); + else + error = EINVAL; + if (error != 0) { + if_printf(ifp, + "failed to change MTU to %d on port %s, " + "reverting all ports to original MTU (%d)\n", + ifr->ifr_mtu, lp->lp_ifp->if_xname, ifp->if_mtu); + break; + } + } + if (error == 0) { + ifp->if_mtu = ifr->ifr_mtu; + } else { + /* set every port back to the original MTU */ + ifr->ifr_mtu = ifp->if_mtu; + CK_SLIST_FOREACH(lp, &sc->sc_ports, lp_entries) { + if (lp->lp_ioctl != NULL) + (*lp->lp_ioctl)(lp->lp_ifp, cmd, data); + } + } + LAGG_XUNLOCK(sc); break; default: diff --git a/freebsd/sys/net/if_stf.c b/freebsd/sys/net/if_stf.c index 1102a62d..3ba9f8c0 100644 --- a/freebsd/sys/net/if_stf.c +++ b/freebsd/sys/net/if_stf.c @@ -373,6 +373,7 @@ stf_encapcheck(const struct mbuf *m, int off, int proto, void *arg) static int stf_getsrcifa6(struct ifnet *ifp, struct in6_addr *addr, struct in6_addr *mask) { + struct rm_priotracker in_ifa_tracker; struct ifaddr *ia; struct in_ifaddr *ia4; struct in6_ifaddr *ia6; @@ -388,9 +389,11 @@ stf_getsrcifa6(struct ifnet *ifp, struct in6_addr *addr, struct in6_addr *mask) continue; bcopy(GET_V4(&sin6->sin6_addr), &in, sizeof(in)); + IN_IFADDR_RLOCK(&in_ifa_tracker); LIST_FOREACH(ia4, INADDR_HASH(in.s_addr), ia_hash) if (ia4->ia_addr.sin_addr.s_addr == in.s_addr) break; + IN_IFADDR_RUNLOCK(&in_ifa_tracker); if (ia4 == NULL) continue; diff --git a/freebsd/sys/net/if_var.h b/freebsd/sys/net/if_var.h index 6504837b..d23928e5 100644 --- a/freebsd/sys/net/if_var.h +++ b/freebsd/sys/net/if_var.h @@ -390,8 +390,6 @@ struct ifnet { struct netdump_methods *if_netdump_methods; #endif /* __rtems__ */ struct epoch_context if_epoch_ctx; - struct epoch_tracker if_addr_et; - struct epoch_tracker if_maddr_et; #ifndef __rtems__ /* diff --git a/freebsd/sys/net/pfvar.h b/freebsd/sys/net/pfvar.h index 5e80b665..2924c06d 100644 --- a/freebsd/sys/net/pfvar.h +++ b/freebsd/sys/net/pfvar.h @@ -824,13 +824,21 @@ typedef void pfsync_update_state_t(struct pf_state *); typedef void pfsync_delete_state_t(struct pf_state *); typedef void pfsync_clear_states_t(u_int32_t, const char *); typedef int pfsync_defer_t(struct pf_state *, struct mbuf *); - -extern pfsync_state_import_t *pfsync_state_import_ptr; -extern pfsync_insert_state_t *pfsync_insert_state_ptr; -extern pfsync_update_state_t *pfsync_update_state_ptr; -extern pfsync_delete_state_t *pfsync_delete_state_ptr; -extern pfsync_clear_states_t *pfsync_clear_states_ptr; -extern pfsync_defer_t *pfsync_defer_ptr; +typedef void pfsync_detach_ifnet_t(struct ifnet *); + +VNET_DECLARE(pfsync_state_import_t *, pfsync_state_import_ptr); +#define V_pfsync_state_import_ptr VNET(pfsync_state_import_ptr) +VNET_DECLARE(pfsync_insert_state_t *, pfsync_insert_state_ptr); +#define V_pfsync_insert_state_ptr VNET(pfsync_insert_state_ptr) +VNET_DECLARE(pfsync_update_state_t *, pfsync_update_state_ptr); +#define V_pfsync_update_state_ptr VNET(pfsync_update_state_ptr) +VNET_DECLARE(pfsync_delete_state_t *, pfsync_delete_state_ptr); +#define V_pfsync_delete_state_ptr VNET(pfsync_delete_state_ptr) +VNET_DECLARE(pfsync_clear_states_t *, pfsync_clear_states_ptr); +#define V_pfsync_clear_states_ptr VNET(pfsync_clear_states_ptr) +VNET_DECLARE(pfsync_defer_t *, pfsync_defer_ptr); +#define V_pfsync_defer_ptr VNET(pfsync_defer_ptr) +extern pfsync_detach_ifnet_t *pfsync_detach_ifnet_ptr; void pfsync_state_export(struct pfsync_state *, struct pf_state *); @@ -1205,6 +1213,19 @@ struct pf_divert { #define PFR_KENTRY_HIWAT 200000 /* Number of table entries */ /* + * Limit the length of the fragment queue traversal. Remember + * search entry points based on the fragment offset. + */ +#define PF_FRAG_ENTRY_POINTS 16 + +/* + * The number of entries in the fragment queue must be limited + * to avoid DoS by linear seaching. Instead of a global limit, + * use a limit per entry point. For large packets these sum up. + */ +#define PF_FRAG_ENTRY_LIMIT 64 + +/* * ioctl parameter structures */ diff --git a/freebsd/sys/netinet/icmp6.h b/freebsd/sys/netinet/icmp6.h index 310684b8..d4da06d0 100644 --- a/freebsd/sys/netinet/icmp6.h +++ b/freebsd/sys/netinet/icmp6.h @@ -244,6 +244,10 @@ struct nd_router_advert { /* router advertisement */ #define ND_RA_FLAG_RTPREF_LOW 0x18 /* 00011000 */ #define ND_RA_FLAG_RTPREF_RSV 0x10 /* 00010000 */ +#ifdef EXPERIMENTAL +#define ND_RA_FLAG_IPV6_ONLY 0x02 /* draft-ietf-6man-ipv6only-flag */ +#endif + #define nd_ra_router_lifetime nd_ra_hdr.icmp6_data16[1] struct nd_neighbor_solicit { /* neighbor solicitation */ diff --git a/freebsd/sys/netinet/in_gif.c b/freebsd/sys/netinet/in_gif.c index b8732a33..be0303d0 100644 --- a/freebsd/sys/netinet/in_gif.c +++ b/freebsd/sys/netinet/in_gif.c @@ -150,7 +150,8 @@ in_gif_srcaddr(void *arg __unused, const struct sockaddr *sa, const struct sockaddr_in *sin; struct gif_softc *sc; - if (V_ipv4_srchashtbl == NULL) + /* Check that VNET is ready */ + if (V_ipv4_hashtbl == NULL) return; MPASS(in_epoch(net_epoch_preempt)); @@ -459,6 +460,8 @@ in_gif_uninit(void) } if (V_ipv4_hashtbl != NULL) { gif_hashdestroy(V_ipv4_hashtbl); + V_ipv4_hashtbl = NULL; + GIF_WAIT(); gif_hashdestroy(V_ipv4_srchashtbl); } } diff --git a/freebsd/sys/netinet/in_mcast.c b/freebsd/sys/netinet/in_mcast.c index e0fd4c37..3b1d57f8 100644 --- a/freebsd/sys/netinet/in_mcast.c +++ b/freebsd/sys/netinet/in_mcast.c @@ -1446,6 +1446,7 @@ static int inp_block_unblock_source(struct inpcb *inp, struct sockopt *sopt) { struct group_source_req gsr; + struct rm_priotracker in_ifa_tracker; sockunion_t *gsa, *ssa; struct ifnet *ifp; struct in_mfilter *imf; @@ -1483,9 +1484,11 @@ inp_block_unblock_source(struct inpcb *inp, struct sockopt *sopt) ssa->sin.sin_len = sizeof(struct sockaddr_in); ssa->sin.sin_addr = mreqs.imr_sourceaddr; - if (!in_nullhost(mreqs.imr_interface)) + if (!in_nullhost(mreqs.imr_interface)) { + IN_IFADDR_RLOCK(&in_ifa_tracker); INADDR_TO_IFP(mreqs.imr_interface, ifp); - + IN_IFADDR_RUNLOCK(&in_ifa_tracker); + } if (sopt->sopt_name == IP_BLOCK_SOURCE) doblock = 1; @@ -1971,7 +1974,6 @@ inp_getmoptions(struct inpcb *inp, struct sockopt *sopt) * * Returns NULL if no ifp could be found. * - * SMPng: TODO: Acquire the appropriate locks for INADDR_TO_IFP. * FUTURE: Implement IPv4 source-address selection. */ static struct ifnet * @@ -1989,7 +1991,9 @@ inp_lookup_mcast_ifp(const struct inpcb *inp, ifp = NULL; if (!in_nullhost(ina)) { + IN_IFADDR_RLOCK(&in_ifa_tracker); INADDR_TO_IFP(ina, ifp); + IN_IFADDR_RUNLOCK(&in_ifa_tracker); } else { fibnum = inp ? inp->inp_inc.inc_fibnum : 0; if (fib4_lookup_nh_basic(fibnum, gsin->sin_addr, 0, 0, &nh4)==0) @@ -2334,6 +2338,7 @@ inp_leave_group(struct inpcb *inp, struct sockopt *sopt) { struct group_source_req gsr; struct ip_mreq_source mreqs; + struct rm_priotracker in_ifa_tracker; sockunion_t *gsa, *ssa; struct ifnet *ifp; struct in_mfilter *imf; @@ -2392,9 +2397,11 @@ inp_leave_group(struct inpcb *inp, struct sockopt *sopt) * XXX NOTE WELL: The RFC 3678 API is preferred because * using an IPv4 address as a key is racy. */ - if (!in_nullhost(mreqs.imr_interface)) + if (!in_nullhost(mreqs.imr_interface)) { + IN_IFADDR_RLOCK(&in_ifa_tracker); INADDR_TO_IFP(mreqs.imr_interface, ifp); - + IN_IFADDR_RUNLOCK(&in_ifa_tracker); + } CTR3(KTR_IGMPV3, "%s: imr_interface = 0x%08x, ifp = %p", __func__, ntohl(mreqs.imr_interface.s_addr), ifp); @@ -2562,6 +2569,7 @@ out_inp_locked: static int inp_set_multicast_if(struct inpcb *inp, struct sockopt *sopt) { + struct rm_priotracker in_ifa_tracker; struct in_addr addr; struct ip_mreqn mreqn; struct ifnet *ifp; @@ -2600,7 +2608,9 @@ inp_set_multicast_if(struct inpcb *inp, struct sockopt *sopt) if (in_nullhost(addr)) { ifp = NULL; } else { + IN_IFADDR_RLOCK(&in_ifa_tracker); INADDR_TO_IFP(addr, ifp); + IN_IFADDR_RUNLOCK(&in_ifa_tracker); if (ifp == NULL) return (EADDRNOTAVAIL); } diff --git a/freebsd/sys/netinet/in_pcb.c b/freebsd/sys/netinet/in_pcb.c index 5ba918fa..6bf43464 100644 --- a/freebsd/sys/netinet/in_pcb.c +++ b/freebsd/sys/netinet/in_pcb.c @@ -272,7 +272,7 @@ in_pcblbgroup_resize(struct inpcblbgrouphead *hdr, grp = in_pcblbgroup_alloc(hdr, old_grp->il_vflag, old_grp->il_lport, &old_grp->il_dependladdr, size); - if (!grp) + if (grp == NULL) return (NULL); KASSERT(old_grp->il_inpcnt < grp->il_inpsiz, @@ -294,21 +294,20 @@ static void in_pcblbgroup_reorder(struct inpcblbgrouphead *hdr, struct inpcblbgroup **grpp, int i) { - struct inpcblbgroup *grp = *grpp; + struct inpcblbgroup *grp, *new_grp; + grp = *grpp; for (; i + 1 < grp->il_inpcnt; ++i) grp->il_inp[i] = grp->il_inp[i + 1]; grp->il_inpcnt--; if (grp->il_inpsiz > INPCBLBGROUP_SIZMIN && - grp->il_inpcnt <= (grp->il_inpsiz / 4)) { + grp->il_inpcnt <= grp->il_inpsiz / 4) { /* Shrink this group. */ - struct inpcblbgroup *new_grp = - in_pcblbgroup_resize(hdr, grp, grp->il_inpsiz / 2); - if (new_grp) + new_grp = in_pcblbgroup_resize(hdr, grp, grp->il_inpsiz / 2); + if (new_grp != NULL) *grpp = new_grp; } - return; } /* @@ -322,31 +321,17 @@ in_pcbinslbgrouphash(struct inpcb *inp) struct inpcbinfo *pcbinfo; struct inpcblbgrouphead *hdr; struct inpcblbgroup *grp; - uint16_t hashmask, lport; - uint32_t group_index; - struct ucred *cred; + uint32_t idx; pcbinfo = inp->inp_pcbinfo; INP_WLOCK_ASSERT(inp); INP_HASH_WLOCK_ASSERT(pcbinfo); - if (pcbinfo->ipi_lbgrouphashbase == NULL) - return (0); - - hashmask = pcbinfo->ipi_lbgrouphashmask; - lport = inp->inp_lport; - group_index = INP_PCBLBGROUP_PORTHASH(lport, hashmask); - hdr = &pcbinfo->ipi_lbgrouphashbase[group_index]; - /* * Don't allow jailed socket to join local group. */ - if (inp->inp_socket != NULL) - cred = inp->inp_socket->so_cred; - else - cred = NULL; - if (cred != NULL && jailed(cred)) + if (inp->inp_socket != NULL && jailed(inp->inp_socket->so_cred)) return (0); #ifdef INET6 @@ -360,24 +345,23 @@ in_pcbinslbgrouphash(struct inpcb *inp) } #endif - hdr = &pcbinfo->ipi_lbgrouphashbase[ - INP_PCBLBGROUP_PORTHASH(inp->inp_lport, - pcbinfo->ipi_lbgrouphashmask)]; + idx = INP_PCBLBGROUP_PORTHASH(inp->inp_lport, + pcbinfo->ipi_lbgrouphashmask); + hdr = &pcbinfo->ipi_lbgrouphashbase[idx]; CK_LIST_FOREACH(grp, hdr, il_list) { if (grp->il_vflag == inp->inp_vflag && grp->il_lport == inp->inp_lport && memcmp(&grp->il_dependladdr, - &inp->inp_inc.inc_ie.ie_dependladdr, - sizeof(grp->il_dependladdr)) == 0) { + &inp->inp_inc.inc_ie.ie_dependladdr, + sizeof(grp->il_dependladdr)) == 0) break; - } } if (grp == NULL) { /* Create new load balance group. */ grp = in_pcblbgroup_alloc(hdr, inp->inp_vflag, inp->inp_lport, &inp->inp_inc.inc_ie.ie_dependladdr, INPCBLBGROUP_SIZMIN); - if (!grp) + if (grp == NULL) return (ENOBUFS); } else if (grp->il_inpcnt == grp->il_inpsiz) { if (grp->il_inpsiz >= INPCBLBGROUP_SIZMAX) { @@ -389,13 +373,13 @@ in_pcbinslbgrouphash(struct inpcb *inp) /* Expand this local group. */ grp = in_pcblbgroup_resize(hdr, grp, grp->il_inpsiz * 2); - if (!grp) + if (grp == NULL) return (ENOBUFS); } KASSERT(grp->il_inpcnt < grp->il_inpsiz, - ("invalid local group size %d and count %d", - grp->il_inpsiz, grp->il_inpcnt)); + ("invalid local group size %d and count %d", grp->il_inpsiz, + grp->il_inpcnt)); grp->il_inp[grp->il_inpcnt] = inp; grp->il_inpcnt++; @@ -418,9 +402,6 @@ in_pcbremlbgrouphash(struct inpcb *inp) INP_WLOCK_ASSERT(inp); INP_HASH_WLOCK_ASSERT(pcbinfo); - if (pcbinfo->ipi_lbgrouphashbase == NULL) - return; - hdr = &pcbinfo->ipi_lbgrouphashbase[ INP_PCBLBGROUP_PORTHASH(inp->inp_lport, pcbinfo->ipi_lbgrouphashmask)]; @@ -2301,13 +2282,11 @@ in_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, struct in_addr faddr, /* * Then look in lb group (for wildcard match). */ - if (pcbinfo->ipi_lbgrouphashbase != NULL && - (lookupflags & INPLOOKUP_WILDCARD)) { + if ((lookupflags & INPLOOKUP_WILDCARD) != 0) { inp = in_pcblookup_lbgroup(pcbinfo, &laddr, lport, &faddr, fport, lookupflags); - if (inp != NULL) { + if (inp != NULL) return (inp); - } } /* diff --git a/freebsd/sys/netinet/ip_carp.c b/freebsd/sys/netinet/ip_carp.c index 8f7f6edf..1fb208a2 100644 --- a/freebsd/sys/netinet/ip_carp.c +++ b/freebsd/sys/netinet/ip_carp.c @@ -2178,6 +2178,21 @@ static struct protosw in6_carp_protosw = { }; #endif +#ifdef VIMAGE +#if defined(__i386__) +/* + * XXX This is a hack to work around an absolute relocation outside + * set_vnet by one (on the stop symbol) for carpstats. Add a dummy variable + * to the end of the file in the hope that the linker will just keep the + * order (as it seems to do at the moment). It is understood to be fragile. + * See PR 230857 for a longer discussion of the problem and the referenced + * review for possible alternate solutions. Each is a hack; we just need + * the least intrusive one for the next release. + */ +VNET_DEFINE(char, carp_zzz) = 0xde; +#endif +#endif + static void carp_mod_cleanup(void) { diff --git a/freebsd/sys/netinet/ip_gre.c b/freebsd/sys/netinet/ip_gre.c index 6a88f37f..1758bfff 100644 --- a/freebsd/sys/netinet/ip_gre.c +++ b/freebsd/sys/netinet/ip_gre.c @@ -169,7 +169,8 @@ in_gre_srcaddr(void *arg __unused, const struct sockaddr *sa, const struct sockaddr_in *sin; struct gre_softc *sc; - if (V_ipv4_srchashtbl == NULL) + /* Check that VNET is ready */ + if (V_ipv4_hashtbl == NULL) return; MPASS(in_epoch(net_epoch_preempt)); @@ -352,6 +353,8 @@ in_gre_uninit(void) } if (V_ipv4_hashtbl != NULL) { gre_hashdestroy(V_ipv4_hashtbl); + V_ipv4_hashtbl = NULL; + GRE_WAIT(); gre_hashdestroy(V_ipv4_srchashtbl); } } diff --git a/freebsd/sys/netinet/ip_icmp.c b/freebsd/sys/netinet/ip_icmp.c index 2f8b6fd1..7e75f3d9 100644 --- a/freebsd/sys/netinet/ip_icmp.c +++ b/freebsd/sys/netinet/ip_icmp.c @@ -266,6 +266,7 @@ icmp_error(struct mbuf *n, int type, int code, uint32_t dest, int mtu) if (n->m_len < oiphlen + tcphlen && (n = m_pullup(n, oiphlen + tcphlen)) == NULL) goto freeit; + oip = mtod(n, struct ip *); icmpelen = max(tcphlen, min(V_icmp_quotelen, ntohs(oip->ip_len) - oiphlen)); } else if (oip->ip_p == IPPROTO_SCTP) { @@ -321,7 +322,8 @@ stdreply: icmpelen = max(8, min(V_icmp_quotelen, ntohs(oip->ip_len) - #endif icmplen = min(icmplen, M_TRAILINGSPACE(m) - sizeof(struct ip) - ICMP_MINLEN); - m_align(m, ICMP_MINLEN + icmplen); + m_align(m, sizeof(struct ip) + ICMP_MINLEN + icmplen); + m->m_data += sizeof(struct ip); m->m_len = ICMP_MINLEN + icmplen; /* XXX MRT make the outgoing packet use the same FIB @@ -363,6 +365,8 @@ stdreply: icmpelen = max(8, min(V_icmp_quotelen, ntohs(oip->ip_len) - * reply should bypass as well. */ m->m_flags |= n->m_flags & M_SKIP_FIREWALL; + KASSERT(M_LEADINGSPACE(m) >= sizeof(struct ip), + ("insufficient space for ip header")); m->m_data -= sizeof(struct ip); m->m_len += sizeof(struct ip); m->m_pkthdr.len = m->m_len; diff --git a/freebsd/sys/netinet/ip_input.c b/freebsd/sys/netinet/ip_input.c index 2852b52e..136a774f 100644 --- a/freebsd/sys/netinet/ip_input.c +++ b/freebsd/sys/netinet/ip_input.c @@ -450,6 +450,7 @@ ip_direct_input(struct mbuf *m) void ip_input(struct mbuf *m) { + struct rm_priotracker in_ifa_tracker; struct ip *ip = NULL; struct in_ifaddr *ia = NULL; struct ifaddr *ifa; @@ -683,7 +684,7 @@ passin: /* * Check for exact addresses in the hash bucket. */ - /* IN_IFADDR_RLOCK(); */ + IN_IFADDR_RLOCK(&in_ifa_tracker); LIST_FOREACH(ia, INADDR_HASH(ip->ip_dst.s_addr), ia_hash) { /* * If the address matches, verify that the packet @@ -695,11 +696,11 @@ passin: counter_u64_add(ia->ia_ifa.ifa_ipackets, 1); counter_u64_add(ia->ia_ifa.ifa_ibytes, m->m_pkthdr.len); - /* IN_IFADDR_RUNLOCK(); */ + IN_IFADDR_RUNLOCK(&in_ifa_tracker); goto ours; } } - /* IN_IFADDR_RUNLOCK(); */ + IN_IFADDR_RUNLOCK(&in_ifa_tracker); /* * Check for broadcast addresses. diff --git a/freebsd/sys/netinet/ip_reass.c b/freebsd/sys/netinet/ip_reass.c index 95603390..8bc5b53b 100644 --- a/freebsd/sys/netinet/ip_reass.c +++ b/freebsd/sys/netinet/ip_reass.c @@ -230,6 +230,16 @@ ip_reass(struct mbuf *m) ip->ip_off = htons(ntohs(ip->ip_off) << 3); /* + * Make sure the fragment lies within a packet of valid size. + */ + if (ntohs(ip->ip_len) + ntohs(ip->ip_off) > IP_MAXPACKET) { + IPSTAT_INC(ips_toolong); + IPSTAT_INC(ips_fragdropped); + m_freem(m); + return (NULL); + } + + /* * Attempt reassembly; if it succeeds, proceed. * ip_reass() will return a different mbuf. */ diff --git a/freebsd/sys/netinet/sctp_output.c b/freebsd/sys/netinet/sctp_output.c index 9493882c..b01ec41f 100644 --- a/freebsd/sys/netinet/sctp_output.c +++ b/freebsd/sys/netinet/sctp_output.c @@ -3735,6 +3735,7 @@ sctp_process_cmsgs_for_init(struct sctp_tcb *stcb, struct mbuf *control, int *er return (0); } +#if defined(INET) || defined(INET6) static struct sctp_tcb * sctp_findassociation_cmsgs(struct sctp_inpcb **inp_p, uint16_t port, @@ -3824,6 +3825,7 @@ sctp_findassociation_cmsgs(struct sctp_inpcb **inp_p, } return (NULL); } +#endif static struct mbuf * sctp_add_cookie(struct mbuf *init, int init_offset, @@ -12701,9 +12703,11 @@ sctp_lower_sosend(struct socket *so, SCTP_INP_WUNLOCK(inp); /* With the lock applied look again */ stcb = sctp_findassociation_ep_addr(&t_inp, addr, &net, NULL, NULL); +#if defined(INET) || defined(INET6) if ((stcb == NULL) && (control != NULL) && (port > 0)) { stcb = sctp_findassociation_cmsgs(&t_inp, port, control, &net, &error); } +#endif if (stcb == NULL) { SCTP_INP_WLOCK(inp); SCTP_INP_DECR_REF(inp); diff --git a/freebsd/sys/netinet/sctp_sysctl.h b/freebsd/sys/netinet/sctp_sysctl.h index 8187ae9e..219b9532 100644 --- a/freebsd/sys/netinet/sctp_sysctl.h +++ b/freebsd/sys/netinet/sctp_sysctl.h @@ -212,7 +212,7 @@ struct sctp_sysctl { #define SCTPCTL_MAXBURST_DEFAULT SCTP_DEF_MAX_BURST /* fr_maxburst: Default max burst for sctp endpoints when fast retransmitting */ -#define SCTPCTL_FRMAXBURST_DESC "Default fr max burst for sctp endpoints" +#define SCTPCTL_FRMAXBURST_DESC "Default max burst for SCTP endpoints when fast retransmitting" #define SCTPCTL_FRMAXBURST_MIN 0 #define SCTPCTL_FRMAXBURST_MAX 0xFFFFFFFF #define SCTPCTL_FRMAXBURST_DEFAULT SCTP_DEF_FRMAX_BURST @@ -243,7 +243,7 @@ struct sctp_sysctl { #define SCTPCTL_MIN_SPLIT_POINT_DEFAULT SCTP_DEFAULT_SPLIT_POINT_MIN /* chunkscale: Tunable for Scaling of number of chunks and messages */ -#define SCTPCTL_CHUNKSCALE_DESC "Tunable for Scaling of number of chunks and messages" +#define SCTPCTL_CHUNKSCALE_DESC "Tunable for scaling of number of chunks and messages" #define SCTPCTL_CHUNKSCALE_MIN 1 #define SCTPCTL_CHUNKSCALE_MAX 0xFFFFFFFF #define SCTPCTL_CHUNKSCALE_DEFAULT SCTP_CHUNKQUEUE_SCALE @@ -327,7 +327,7 @@ struct sctp_sysctl { #define SCTPCTL_VALID_COOKIE_LIFE_DEFAULT SCTP_DEFAULT_COOKIE_LIFE /* init_rtx_max: Default maximum number of retransmission for INIT chunks */ -#define SCTPCTL_INIT_RTX_MAX_DESC "Default maximum number of retransmission for INIT chunks" +#define SCTPCTL_INIT_RTX_MAX_DESC "Default maximum number of retransmissions for INIT chunks" #define SCTPCTL_INIT_RTX_MAX_MIN 0 #define SCTPCTL_INIT_RTX_MAX_MAX 0xFFFFFFFF #define SCTPCTL_INIT_RTX_MAX_DEFAULT SCTP_DEF_MAX_INIT @@ -380,8 +380,8 @@ struct sctp_sysctl { #define SCTPCTL_CMT_USE_DAC_MAX 1 #define SCTPCTL_CMT_USE_DAC_DEFAULT 0 -/* cwnd_maxburst: Use a CWND adjusting maxburst */ -#define SCTPCTL_CWND_MAXBURST_DESC "Use a CWND adjusting maxburst" +/* cwnd_maxburst: Use a CWND adjusting to implement maxburst */ +#define SCTPCTL_CWND_MAXBURST_DESC "Adjust congestion control window to limit maximum burst when sending" #define SCTPCTL_CWND_MAXBURST_MIN 0 #define SCTPCTL_CWND_MAXBURST_MAX 1 #define SCTPCTL_CWND_MAXBURST_DEFAULT 1 @@ -417,7 +417,7 @@ struct sctp_sysctl { #define SCTPCTL_HB_MAX_BURST_DEFAULT SCTP_DEF_HBMAX_BURST /* abort_at_limit: When one-2-one hits qlimit abort */ -#define SCTPCTL_ABORT_AT_LIMIT_DESC "When one-2-one hits qlimit abort" +#define SCTPCTL_ABORT_AT_LIMIT_DESC "Abort when one-to-one hits qlimit" #define SCTPCTL_ABORT_AT_LIMIT_MIN 0 #define SCTPCTL_ABORT_AT_LIMIT_MAX 1 #define SCTPCTL_ABORT_AT_LIMIT_DEFAULT 0 @@ -429,7 +429,7 @@ struct sctp_sysctl { #define SCTPCTL_MIN_RESIDUAL_DEFAULT 1452 /* max_retran_chunk: max chunk retransmissions */ -#define SCTPCTL_MAX_RETRAN_CHUNK_DESC "Maximum times an unlucky chunk can be retran'd before assoc abort" +#define SCTPCTL_MAX_RETRAN_CHUNK_DESC "Maximum times an unlucky chunk can be retransmitted before assoc abort" #define SCTPCTL_MAX_RETRAN_CHUNK_MIN 0 #define SCTPCTL_MAX_RETRAN_CHUNK_MAX 65535 #define SCTPCTL_MAX_RETRAN_CHUNK_DEFAULT 30 @@ -477,63 +477,63 @@ struct sctp_sysctl { #define SCTPCTL_UDP_TUNNELING_PORT_DEFAULT 0 /* Enable sending of the SACK-IMMEDIATELY bit */ -#define SCTPCTL_SACK_IMMEDIATELY_ENABLE_DESC "Enable sending of the SACK-IMMEDIATELY-bit." +#define SCTPCTL_SACK_IMMEDIATELY_ENABLE_DESC "Enable sending of the SACK-IMMEDIATELY-bit" #define SCTPCTL_SACK_IMMEDIATELY_ENABLE_MIN 0 #define SCTPCTL_SACK_IMMEDIATELY_ENABLE_MAX 1 #define SCTPCTL_SACK_IMMEDIATELY_ENABLE_DEFAULT SCTPCTL_SACK_IMMEDIATELY_ENABLE_MAX /* Enable sending of the NAT-FRIENDLY message */ -#define SCTPCTL_NAT_FRIENDLY_INITS_DESC "Enable sending of the nat-friendly SCTP option on INITs." +#define SCTPCTL_NAT_FRIENDLY_INITS_DESC "Enable sending of the nat-friendly SCTP option on INITs" #define SCTPCTL_NAT_FRIENDLY_INITS_MIN 0 #define SCTPCTL_NAT_FRIENDLY_INITS_MAX 1 #define SCTPCTL_NAT_FRIENDLY_INITS_DEFAULT SCTPCTL_NAT_FRIENDLY_INITS_MIN /* Vtag time wait in seconds */ -#define SCTPCTL_TIME_WAIT_DESC "Vtag time wait time in seconds, 0 disables it." +#define SCTPCTL_TIME_WAIT_DESC "Vtag time wait time in seconds, 0 disables it" #define SCTPCTL_TIME_WAIT_MIN 0 #define SCTPCTL_TIME_WAIT_MAX 0xffffffff #define SCTPCTL_TIME_WAIT_DEFAULT SCTP_TIME_WAIT /* Enable Send/Receive buffer splitting */ -#define SCTPCTL_BUFFER_SPLITTING_DESC "Enable send/receive buffer splitting." +#define SCTPCTL_BUFFER_SPLITTING_DESC "Enable send/receive buffer splitting" #define SCTPCTL_BUFFER_SPLITTING_MIN 0 #define SCTPCTL_BUFFER_SPLITTING_MAX 0x3 #define SCTPCTL_BUFFER_SPLITTING_DEFAULT SCTPCTL_BUFFER_SPLITTING_MIN -/* Initial congestion window in MTU */ -#define SCTPCTL_INITIAL_CWND_DESC "Initial congestion window in MTUs" +/* Initial congestion window in MTUs */ +#define SCTPCTL_INITIAL_CWND_DESC "Defines the initial congestion window size in MTUs" #define SCTPCTL_INITIAL_CWND_MIN 0 #define SCTPCTL_INITIAL_CWND_MAX 0xffffffff #define SCTPCTL_INITIAL_CWND_DEFAULT 3 /* rttvar smooth avg for bw calc */ -#define SCTPCTL_RTTVAR_BW_DESC "Shift amount for bw smoothing on rtt calc" +#define SCTPCTL_RTTVAR_BW_DESC "Shift amount DCCC uses for bw smoothing on rtt calc" #define SCTPCTL_RTTVAR_BW_MIN 0 #define SCTPCTL_RTTVAR_BW_MAX 32 #define SCTPCTL_RTTVAR_BW_DEFAULT 4 /* rttvar smooth avg for bw calc */ -#define SCTPCTL_RTTVAR_RTT_DESC "Shift amount for rtt smoothing on rtt calc" +#define SCTPCTL_RTTVAR_RTT_DESC "Shift amount DCCC uses for rtt smoothing on rtt calc" #define SCTPCTL_RTTVAR_RTT_MIN 0 #define SCTPCTL_RTTVAR_RTT_MAX 32 #define SCTPCTL_RTTVAR_RTT_DEFAULT 5 -#define SCTPCTL_RTTVAR_EQRET_DESC "What to return when rtt and bw are unchanged" +#define SCTPCTL_RTTVAR_EQRET_DESC "Whether DCCC increases cwnd when the rtt and bw are unchanged" #define SCTPCTL_RTTVAR_EQRET_MIN 0 #define SCTPCTL_RTTVAR_EQRET_MAX 1 #define SCTPCTL_RTTVAR_EQRET_DEFAULT 0 -#define SCTPCTL_RTTVAR_STEADYS_DESC "How many the sames it takes to try step down of cwnd" +#define SCTPCTL_RTTVAR_STEADYS_DESC "Number of identical bw measurements DCCC takes to try step down of cwnd" #define SCTPCTL_RTTVAR_STEADYS_MIN 0 #define SCTPCTL_RTTVAR_STEADYS_MAX 0xFFFF #define SCTPCTL_RTTVAR_STEADYS_DEFAULT 20 /* 0 means disable feature */ -#define SCTPCTL_RTTVAR_DCCCECN_DESC "Enable for RTCC CC datacenter ECN" +#define SCTPCTL_RTTVAR_DCCCECN_DESC "Enable ECN for DCCC." #define SCTPCTL_RTTVAR_DCCCECN_MIN 0 #define SCTPCTL_RTTVAR_DCCCECN_MAX 1 #define SCTPCTL_RTTVAR_DCCCECN_DEFAULT 1 /* 0 means disable feature */ -#define SCTPCTL_BLACKHOLE_DESC "Enable SCTP blackholing. See blackhole(4) for more details." +#define SCTPCTL_BLACKHOLE_DESC "Enable SCTP blackholing, see blackhole(4) for more details" #define SCTPCTL_BLACKHOLE_MIN 0 #define SCTPCTL_BLACKHOLE_MAX 2 #define SCTPCTL_BLACKHOLE_DEFAULT SCTPCTL_BLACKHOLE_MIN @@ -553,7 +553,7 @@ struct sctp_sysctl { #if defined(__APPLE__) || defined(SCTP_SO_LOCK_TESTING) -#define SCTPCTL_OUTPUT_UNLOCKED_DESC "Unlock socket when sending packets down to IP." +#define SCTPCTL_OUTPUT_UNLOCKED_DESC "Unlock socket when sending packets down to IP" #define SCTPCTL_OUTPUT_UNLOCKED_MIN 0 #define SCTPCTL_OUTPUT_UNLOCKED_MAX 1 #define SCTPCTL_OUTPUT_UNLOCKED_DEFAULT SCTPCTL_OUTPUT_UNLOCKED_MIN diff --git a/freebsd/sys/netinet/tcp_syncache.c b/freebsd/sys/netinet/tcp_syncache.c index a913f5b6..bfbf9f42 100644 --- a/freebsd/sys/netinet/tcp_syncache.c +++ b/freebsd/sys/netinet/tcp_syncache.c @@ -583,15 +583,28 @@ syncache_chkrst(struct in_conninfo *inc, struct tcphdr *th, struct mbuf *m) /* * If the RST bit is set, check the sequence number to see * if this is a valid reset segment. + * * RFC 793 page 37: * In all states except SYN-SENT, all reset (RST) segments * are validated by checking their SEQ-fields. A reset is * valid if its sequence number is in the window. * - * The sequence number in the reset segment is normally an - * echo of our outgoing acknowlegement numbers, but some hosts - * send a reset with the sequence number at the rightmost edge - * of our receive window, and we have to handle this case. + * RFC 793 page 69: + * There are four cases for the acceptability test for an incoming + * segment: + * + * Segment Receive Test + * Length Window + * ------- ------- ------------------------------------------- + * 0 0 SEG.SEQ = RCV.NXT + * 0 >0 RCV.NXT =< SEG.SEQ < RCV.NXT+RCV.WND + * >0 0 not acceptable + * >0 >0 RCV.NXT =< SEG.SEQ < RCV.NXT+RCV.WND + * or RCV.NXT =< SEG.SEQ+SEG.LEN-1 < RCV.NXT+RCV.WND + * + * Note that when receiving a SYN segment in the LISTEN state, + * IRS is set to SEG.SEQ and RCV.NXT is set to SEG.SEQ+1, as + * described in RFC 793, page 66. */ if ((SEQ_GEQ(th->th_seq, sc->sc_irs + 1) && SEQ_LT(th->th_seq, sc->sc_irs + 1 + sc->sc_wnd)) || diff --git a/freebsd/sys/netinet6/in6_gif.c b/freebsd/sys/netinet6/in6_gif.c index 66c2cfb4..d7396bb1 100644 --- a/freebsd/sys/netinet6/in6_gif.c +++ b/freebsd/sys/netinet6/in6_gif.c @@ -155,7 +155,8 @@ in6_gif_srcaddr(void *arg __unused, const struct sockaddr *sa, int event) const struct sockaddr_in6 *sin; struct gif_softc *sc; - if (V_ipv6_srchashtbl == NULL) + /* Check that VNET is ready */ + if (V_ipv6_hashtbl == NULL) return; MPASS(in_epoch(net_epoch_preempt)); @@ -482,6 +483,8 @@ in6_gif_uninit(void) } if (V_ipv6_hashtbl != NULL) { gif_hashdestroy(V_ipv6_hashtbl); + V_ipv6_hashtbl = NULL; + GIF_WAIT(); gif_hashdestroy(V_ipv6_srchashtbl); } } diff --git a/freebsd/sys/netinet6/in6_ifattach.c b/freebsd/sys/netinet6/in6_ifattach.c index 1cab31d1..6af4b557 100644 --- a/freebsd/sys/netinet6/in6_ifattach.c +++ b/freebsd/sys/netinet6/in6_ifattach.c @@ -487,9 +487,16 @@ in6_ifattach_linklocal(struct ifnet *ifp, struct ifnet *altifp) return (-1); } - ia = in6ifa_ifpforlinklocal(ifp, 0); /* ia must not be NULL */ - KASSERT(ia != NULL, ("%s: ia == NULL, ifp=%p", __func__, ifp)); - + ia = in6ifa_ifpforlinklocal(ifp, 0); + if (ia == NULL) { + /* + * Another thread removed the address that we just added. + * This should be rare, but it happens. + */ + nd6log((LOG_NOTICE, "%s: %s: new link-local address " + "disappeared\n", __func__, if_name(ifp))); + return (-1); + } ifa_free(&ia->ia_ifa); /* diff --git a/freebsd/sys/netinet6/in6_pcb.c b/freebsd/sys/netinet6/in6_pcb.c index 53102764..f5b22db7 100644 --- a/freebsd/sys/netinet6/in6_pcb.c +++ b/freebsd/sys/netinet6/in6_pcb.c @@ -1175,13 +1175,11 @@ in6_pcblookup_hash_locked(struct inpcbinfo *pcbinfo, struct in6_addr *faddr, /* * Then look in lb group (for wildcard match). */ - if (pcbinfo->ipi_lbgrouphashbase != NULL && - (lookupflags & INPLOOKUP_WILDCARD)) { + if ((lookupflags & INPLOOKUP_WILDCARD) != 0) { inp = in6_pcblookup_lbgroup(pcbinfo, laddr, lport, faddr, fport, lookupflags); - if (inp != NULL) { + if (inp != NULL) return (inp); - } } /* diff --git a/freebsd/sys/netinet6/nd6.h b/freebsd/sys/netinet6/nd6.h index cabfeec0..7544d23c 100644 --- a/freebsd/sys/netinet6/nd6.h +++ b/freebsd/sys/netinet6/nd6.h @@ -90,6 +90,9 @@ struct nd_ifinfo { #define ND6_IFF_NO_RADR 0x40 #define ND6_IFF_NO_PREFER_IFACE 0x80 /* XXX: not related to ND. */ #define ND6_IFF_NO_DAD 0x100 +#ifdef EXPERIMENTAL +#define ND6_IFF_IPV6_ONLY 0x200 /* draft-ietf-6man-ipv6only-flag */ +#endif #ifdef _KERNEL #define ND_IFINFO(ifp) \ diff --git a/freebsd/sys/netinet6/nd6_rtr.c b/freebsd/sys/netinet6/nd6_rtr.c index a60e7c66..59868383 100644 --- a/freebsd/sys/netinet6/nd6_rtr.c +++ b/freebsd/sys/netinet6/nd6_rtr.c @@ -206,6 +206,37 @@ nd6_rs_input(struct mbuf *m, int off, int icmp6len) m_freem(m); } +#ifdef EXPERIMENTAL +/* + * An initial update routine for draft-ietf-6man-ipv6only-flag. + * We need to iterate over all default routers for the given + * interface to see whether they are all advertising the "6" + * (IPv6-Only) flag. If they do set, otherwise unset, the + * interface flag we later use to filter on. + */ +static void +defrtr_ipv6_only_ifp(struct ifnet *ifp) +{ + struct nd_defrouter *dr; + bool ipv6_only; + + ipv6_only = true; + ND6_RLOCK(); + TAILQ_FOREACH(dr, &V_nd_defrouter, dr_entry) + if (dr->ifp == ifp && + (dr->raflags & ND_RA_FLAG_IPV6_ONLY) == 0) + ipv6_only = false; + ND6_RUNLOCK(); + + IF_AFDATA_WLOCK(ifp); + if (ipv6_only) + ND_IFINFO(ifp)->flags |= ND6_IFF_IPV6_ONLY; + else + ND_IFINFO(ifp)->flags &= ~ND6_IFF_IPV6_ONLY; + IF_AFDATA_WUNLOCK(ifp); +} +#endif + /* * Receive Router Advertisement Message. * @@ -321,6 +352,9 @@ nd6_ra_input(struct mbuf *m, int off, int icmp6len) } } dr = defrtrlist_update(&dr0); +#ifdef EXPERIMENTAL + defrtr_ipv6_only_ifp(ifp); +#endif } /* @@ -694,6 +728,10 @@ defrouter_del(struct nd_defrouter *dr) if (ND_IFINFO(dr->ifp)->flags & ND6_IFF_ACCEPT_RTADV) rt6_flush(&dr->rtaddr, dr->ifp); +#ifdef EXPERIMENTAL + defrtr_ipv6_only_ifp(dr->ifp); +#endif + if (dr->installed) { deldr = dr; defrouter_delreq(dr); diff --git a/freebsd/sys/netinet6/raw_ip6.c b/freebsd/sys/netinet6/raw_ip6.c index 9c3d7a61..73d0832a 100644 --- a/freebsd/sys/netinet6/raw_ip6.c +++ b/freebsd/sys/netinet6/raw_ip6.c @@ -189,6 +189,45 @@ rip6_input(struct mbuf **mp, int *offp, int proto) if (!IN6_IS_ADDR_UNSPECIFIED(&in6p->in6p_faddr) && !IN6_ARE_ADDR_EQUAL(&in6p->in6p_faddr, &ip6->ip6_src)) continue; + if (last != NULL) { + struct mbuf *n = m_copym(m, 0, M_COPYALL, M_NOWAIT); + +#if defined(IPSEC) || defined(IPSEC_SUPPORT) + /* + * Check AH/ESP integrity. + */ + if (IPSEC_ENABLED(ipv6)) { + if (n != NULL && + IPSEC_CHECK_POLICY(ipv6, n, last) != 0) { + m_freem(n); + /* Do not inject data into pcb. */ + n = NULL; + } + } +#endif /* IPSEC */ + if (n) { + if (last->inp_flags & INP_CONTROLOPTS || + last->inp_socket->so_options & SO_TIMESTAMP) + ip6_savecontrol(last, n, &opts); + /* strip intermediate headers */ + m_adj(n, *offp); + if (sbappendaddr(&last->inp_socket->so_rcv, + (struct sockaddr *)&fromsa, + n, opts) == 0) { + m_freem(n); + if (opts) + m_freem(opts); + RIP6STAT_INC(rip6s_fullsock); + } else + sorwakeup(last->inp_socket); + opts = NULL; + } + INP_RUNLOCK(last); + last = NULL; + } + INP_RLOCK(in6p); + if (__predict_false(in6p->inp_flags2 & INP_FREED)) + goto skip_2; if (jailed_without_vnet(in6p->inp_cred)) { /* * Allow raw socket in jail to receive multicast; @@ -198,16 +237,14 @@ rip6_input(struct mbuf **mp, int *offp, int proto) if (!IN6_IS_ADDR_MULTICAST(&ip6->ip6_dst) && prison_check_ip6(in6p->inp_cred, &ip6->ip6_dst) != 0) - continue; + goto skip_2; } - INP_RLOCK(in6p); if (in6p->in6p_cksum != -1) { RIP6STAT_INC(rip6s_isum); if (in6_cksum(m, proto, *offp, m->m_pkthdr.len - *offp)) { - INP_RUNLOCK(in6p); RIP6STAT_INC(rip6s_badsum); - continue; + goto skip_2; } } /* @@ -253,46 +290,13 @@ rip6_input(struct mbuf **mp, int *offp, int proto) } if (blocked != MCAST_PASS) { IP6STAT_INC(ip6s_notmember); - INP_RUNLOCK(in6p); - continue; - } - } - if (last != NULL) { - struct mbuf *n = m_copym(m, 0, M_COPYALL, M_NOWAIT); - -#if defined(IPSEC) || defined(IPSEC_SUPPORT) - /* - * Check AH/ESP integrity. - */ - if (IPSEC_ENABLED(ipv6)) { - if (n != NULL && - IPSEC_CHECK_POLICY(ipv6, n, last) != 0) { - m_freem(n); - /* Do not inject data into pcb. */ - n = NULL; - } - } -#endif /* IPSEC */ - if (n) { - if (last->inp_flags & INP_CONTROLOPTS || - last->inp_socket->so_options & SO_TIMESTAMP) - ip6_savecontrol(last, n, &opts); - /* strip intermediate headers */ - m_adj(n, *offp); - if (sbappendaddr(&last->inp_socket->so_rcv, - (struct sockaddr *)&fromsa, - n, opts) == 0) { - m_freem(n); - if (opts) - m_freem(opts); - RIP6STAT_INC(rip6s_fullsock); - } else - sorwakeup(last->inp_socket); - opts = NULL; + goto skip_2; } - INP_RUNLOCK(last); } last = in6p; + continue; +skip_2: + INP_RUNLOCK(in6p); } INP_INFO_RUNLOCK_ET(&V_ripcbinfo, et); #if defined(IPSEC) || defined(IPSEC_SUPPORT) diff --git a/freebsd/sys/netpfil/pf/if_pfsync.c b/freebsd/sys/netpfil/pf/if_pfsync.c index dae091db..9612ac99 100644 --- a/freebsd/sys/netpfil/pf/if_pfsync.c +++ b/freebsd/sys/netpfil/pf/if_pfsync.c @@ -283,6 +283,7 @@ static void pfsync_bulk_status(u_int8_t); static void pfsync_bulk_update(void *); static void pfsync_bulk_fail(void *); +static void pfsync_detach_ifnet(struct ifnet *); #ifdef IPSEC static void pfsync_update_net_tdb(struct pfsync_tdb *); #endif @@ -1364,10 +1365,10 @@ pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) sc->sc_maxupdates = pfsyncr.pfsyncr_maxupdates; if (pfsyncr.pfsyncr_defer) { sc->sc_flags |= PFSYNCF_DEFER; - pfsync_defer_ptr = pfsync_defer; + V_pfsync_defer_ptr = pfsync_defer; } else { sc->sc_flags &= ~PFSYNCF_DEFER; - pfsync_defer_ptr = NULL; + V_pfsync_defer_ptr = NULL; } if (sifp == NULL) { @@ -1395,6 +1396,7 @@ pfsyncioctl(struct ifnet *ifp, u_long cmd, caddr_t data) if (error) { if_rele(sifp); free(mship, M_PFSYNC); + PFSYNC_UNLOCK(sc); return (error); } } @@ -2294,6 +2296,29 @@ pfsync_multicast_cleanup(struct pfsync_softc *sc) imo->imo_multicast_ifp = NULL; } +void +pfsync_detach_ifnet(struct ifnet *ifp) +{ + struct pfsync_softc *sc = V_pfsyncif; + + if (sc == NULL) + return; + + PFSYNC_LOCK(sc); + + if (sc->sc_sync_if == ifp) { + /* We don't need mutlicast cleanup here, because the interface + * is going away. We do need to ensure we don't try to do + * cleanup later. + */ + sc->sc_imo.imo_membership = NULL; + sc->sc_imo.imo_multicast_ifp = NULL; + sc->sc_sync_if = NULL; + } + + PFSYNC_UNLOCK(sc); +} + #ifdef INET extern struct domain inetdomain; static struct protosw in_pfsync_protosw = { @@ -2313,12 +2338,12 @@ pfsync_pointers_init() { PF_RULES_WLOCK(); - pfsync_state_import_ptr = pfsync_state_import; - pfsync_insert_state_ptr = pfsync_insert_state; - pfsync_update_state_ptr = pfsync_update_state; - pfsync_delete_state_ptr = pfsync_delete_state; - pfsync_clear_states_ptr = pfsync_clear_states; - pfsync_defer_ptr = pfsync_defer; + V_pfsync_state_import_ptr = pfsync_state_import; + V_pfsync_insert_state_ptr = pfsync_insert_state; + V_pfsync_update_state_ptr = pfsync_update_state; + V_pfsync_delete_state_ptr = pfsync_delete_state; + V_pfsync_clear_states_ptr = pfsync_clear_states; + V_pfsync_defer_ptr = pfsync_defer; PF_RULES_WUNLOCK(); } @@ -2327,12 +2352,12 @@ pfsync_pointers_uninit() { PF_RULES_WLOCK(); - pfsync_state_import_ptr = NULL; - pfsync_insert_state_ptr = NULL; - pfsync_update_state_ptr = NULL; - pfsync_delete_state_ptr = NULL; - pfsync_clear_states_ptr = NULL; - pfsync_defer_ptr = NULL; + V_pfsync_state_import_ptr = NULL; + V_pfsync_insert_state_ptr = NULL; + V_pfsync_update_state_ptr = NULL; + V_pfsync_delete_state_ptr = NULL; + V_pfsync_clear_states_ptr = NULL; + V_pfsync_defer_ptr = NULL; PF_RULES_WUNLOCK(); } @@ -2349,6 +2374,8 @@ vnet_pfsync_init(const void *unused __unused) if_clone_detach(V_pfsync_cloner); log(LOG_INFO, "swi_add() failed in %s\n", __func__); } + + pfsync_pointers_init(); } VNET_SYSINIT(vnet_pfsync_init, SI_SUB_PROTO_FIREWALL, SI_ORDER_ANY, vnet_pfsync_init, NULL); @@ -2357,14 +2384,13 @@ static void vnet_pfsync_uninit(const void *unused __unused) { + pfsync_pointers_uninit(); + if_clone_detach(V_pfsync_cloner); swi_remove(V_pfsync_swi_cookie); } -/* - * Detach after pf is gone; otherwise we might touch pfsync memory - * from within pf after freeing pfsync. - */ -VNET_SYSUNINIT(vnet_pfsync_uninit, SI_SUB_INIT_IF, SI_ORDER_SECOND, + +VNET_SYSUNINIT(vnet_pfsync_uninit, SI_SUB_PROTO_FIREWALL, SI_ORDER_FOURTH, vnet_pfsync_uninit, NULL); static int @@ -2373,6 +2399,8 @@ pfsync_init() #ifdef INET int error; + pfsync_detach_ifnet_ptr = pfsync_detach_ifnet; + error = pf_proto_register(PF_INET, &in_pfsync_protosw); if (error) return (error); @@ -2382,7 +2410,6 @@ pfsync_init() return (error); } #endif - pfsync_pointers_init(); return (0); } @@ -2390,8 +2417,7 @@ pfsync_init() static void pfsync_uninit() { - - pfsync_pointers_uninit(); + pfsync_detach_ifnet_ptr = NULL; #ifdef INET ipproto_unregister(IPPROTO_PFSYNC); @@ -2408,12 +2434,6 @@ pfsync_modevent(module_t mod, int type, void *data) case MOD_LOAD: error = pfsync_init(); break; - case MOD_QUIESCE: - /* - * Module should not be unloaded due to race conditions. - */ - error = EBUSY; - break; case MOD_UNLOAD: pfsync_uninit(); break; diff --git a/freebsd/sys/netpfil/pf/pf.c b/freebsd/sys/netpfil/pf/pf.c index 5fa7a8fe..e115061a 100644 --- a/freebsd/sys/netpfil/pf/pf.c +++ b/freebsd/sys/netpfil/pf/pf.c @@ -1270,8 +1270,8 @@ pf_state_insert(struct pfi_kif *kif, struct pf_state_key *skw, refcount_init(&s->refs, 2); counter_u64_add(V_pf_status.fcounters[FCNT_STATE_INSERT], 1); - if (pfsync_insert_state_ptr != NULL) - pfsync_insert_state_ptr(s); + if (V_pfsync_insert_state_ptr != NULL) + V_pfsync_insert_state_ptr(s); /* Returns locked. */ return (0); @@ -1674,8 +1674,8 @@ pf_unlink_state(struct pf_state *s, u_int flags) LIST_REMOVE(s, entry); pf_src_tree_remove_state(s); - if (pfsync_delete_state_ptr != NULL) - pfsync_delete_state_ptr(s); + if (V_pfsync_delete_state_ptr != NULL) + V_pfsync_delete_state_ptr(s); STATE_DEC_COUNTERS(s); @@ -3441,7 +3441,7 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction, if (PF_ANEQ(daddr, &nk->addr[pd->didx], AF_INET6)) - PF_ACPY(saddr, &nk->addr[pd->didx], af); + PF_ACPY(daddr, &nk->addr[pd->didx], af); break; #endif /* INET */ } @@ -3594,7 +3594,7 @@ pf_test_rule(struct pf_rule **rm, struct pf_state **sm, int direction, if (*sm != NULL && !((*sm)->state_flags & PFSTATE_NOSYNC) && direction == PF_OUT && - pfsync_defer_ptr != NULL && pfsync_defer_ptr(*sm, m)) + V_pfsync_defer_ptr != NULL && V_pfsync_defer_ptr(*sm, m)) /* * We want the state created, but we dont * want to send this in case a partner @@ -6008,8 +6008,8 @@ pf_test(int dir, int pflags, struct ifnet *ifp, struct mbuf **m0, struct inpcb * action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd, &reason); if (action == PF_PASS) { - if (pfsync_update_state_ptr != NULL) - pfsync_update_state_ptr(s); + if (V_pfsync_update_state_ptr != NULL) + V_pfsync_update_state_ptr(s); r = s->rule.ptr; a = s->anchor.ptr; log = s->log; @@ -6037,8 +6037,8 @@ pf_test(int dir, int pflags, struct ifnet *ifp, struct mbuf **m0, struct inpcb * } action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd); if (action == PF_PASS) { - if (pfsync_update_state_ptr != NULL) - pfsync_update_state_ptr(s); + if (V_pfsync_update_state_ptr != NULL) + V_pfsync_update_state_ptr(s); r = s->rule.ptr; a = s->anchor.ptr; log = s->log; @@ -6060,8 +6060,8 @@ pf_test(int dir, int pflags, struct ifnet *ifp, struct mbuf **m0, struct inpcb * action = pf_test_state_icmp(&s, dir, kif, m, off, h, &pd, &reason); if (action == PF_PASS) { - if (pfsync_update_state_ptr != NULL) - pfsync_update_state_ptr(s); + if (V_pfsync_update_state_ptr != NULL) + V_pfsync_update_state_ptr(s); r = s->rule.ptr; a = s->anchor.ptr; log = s->log; @@ -6083,8 +6083,8 @@ pf_test(int dir, int pflags, struct ifnet *ifp, struct mbuf **m0, struct inpcb * default: action = pf_test_state_other(&s, dir, kif, m, &pd); if (action == PF_PASS) { - if (pfsync_update_state_ptr != NULL) - pfsync_update_state_ptr(s); + if (V_pfsync_update_state_ptr != NULL) + V_pfsync_update_state_ptr(s); r = s->rule.ptr; a = s->anchor.ptr; log = s->log; @@ -6447,8 +6447,8 @@ pf_test6(int dir, int pflags, struct ifnet *ifp, struct mbuf **m0, struct inpcb action = pf_test_state_tcp(&s, dir, kif, m, off, h, &pd, &reason); if (action == PF_PASS) { - if (pfsync_update_state_ptr != NULL) - pfsync_update_state_ptr(s); + if (V_pfsync_update_state_ptr != NULL) + V_pfsync_update_state_ptr(s); r = s->rule.ptr; a = s->anchor.ptr; log = s->log; @@ -6476,8 +6476,8 @@ pf_test6(int dir, int pflags, struct ifnet *ifp, struct mbuf **m0, struct inpcb } action = pf_test_state_udp(&s, dir, kif, m, off, h, &pd); if (action == PF_PASS) { - if (pfsync_update_state_ptr != NULL) - pfsync_update_state_ptr(s); + if (V_pfsync_update_state_ptr != NULL) + V_pfsync_update_state_ptr(s); r = s->rule.ptr; a = s->anchor.ptr; log = s->log; @@ -6506,8 +6506,8 @@ pf_test6(int dir, int pflags, struct ifnet *ifp, struct mbuf **m0, struct inpcb action = pf_test_state_icmp(&s, dir, kif, m, off, h, &pd, &reason); if (action == PF_PASS) { - if (pfsync_update_state_ptr != NULL) - pfsync_update_state_ptr(s); + if (V_pfsync_update_state_ptr != NULL) + V_pfsync_update_state_ptr(s); r = s->rule.ptr; a = s->anchor.ptr; log = s->log; @@ -6520,8 +6520,8 @@ pf_test6(int dir, int pflags, struct ifnet *ifp, struct mbuf **m0, struct inpcb default: action = pf_test_state_other(&s, dir, kif, m, &pd); if (action == PF_PASS) { - if (pfsync_update_state_ptr != NULL) - pfsync_update_state_ptr(s); + if (V_pfsync_update_state_ptr != NULL) + V_pfsync_update_state_ptr(s); r = s->rule.ptr; a = s->anchor.ptr; log = s->log; diff --git a/freebsd/sys/netpfil/pf/pf_if.c b/freebsd/sys/netpfil/pf/pf_if.c index 2c321118..ed69acad 100644 --- a/freebsd/sys/netpfil/pf/pf_if.c +++ b/freebsd/sys/netpfil/pf/pf_if.c @@ -167,8 +167,10 @@ pfi_cleanup_vnet(void) RB_REMOVE(pfi_ifhead, &V_pfi_ifs, kif); if (kif->pfik_group) kif->pfik_group->ifg_pf_kif = NULL; - if (kif->pfik_ifp) + if (kif->pfik_ifp) { + if_rele(kif->pfik_ifp); kif->pfik_ifp->if_pf_kif = NULL; + } free(kif, PFI_MTYPE); } @@ -324,6 +326,8 @@ pfi_attach_ifnet(struct ifnet *ifp) V_pfi_update++; kif = pfi_kif_attach(kif, ifp->if_xname); + if_ref(ifp); + kif->pfik_ifp = ifp; ifp->if_pf_kif = kif; @@ -555,7 +559,8 @@ pfi_instance_add(struct ifnet *ifp, int net, int flags) if ((flags & PFI_AFLAG_PEER) && !(ifp->if_flags & IFF_POINTOPOINT)) continue; - if ((flags & PFI_AFLAG_NETWORK) && af == AF_INET6 && + if ((flags & (PFI_AFLAG_NETWORK | PFI_AFLAG_NOALIAS)) && + af == AF_INET6 && IN6_IS_ADDR_LINKLOCAL( &((struct sockaddr_in6 *)ia->ifa_addr)->sin6_addr)) continue; @@ -835,6 +840,9 @@ pfi_detach_ifnet_event(void *arg __unused, struct ifnet *ifp) { struct pfi_kif *kif = (struct pfi_kif *)ifp->if_pf_kif; + if (pfsync_detach_ifnet_ptr) + pfsync_detach_ifnet_ptr(ifp); + if (kif == NULL) return; @@ -842,10 +850,13 @@ pfi_detach_ifnet_event(void *arg __unused, struct ifnet *ifp) /* Avoid teardown race in the least expensive way. */ return; } + PF_RULES_WLOCK(); V_pfi_update++; pfi_kif_update(kif); + if_rele(kif->pfik_ifp); + kif->pfik_ifp = NULL; ifp->if_pf_kif = NULL; #ifdef ALTQ @@ -906,6 +917,9 @@ pfi_detach_group_event(void *arg __unused, struct ifg_group *ifg) static void pfi_ifaddr_event(void *arg __unused, struct ifnet *ifp) { + + KASSERT(ifp, ("ifp == NULL")); + if (ifp->if_pf_kif == NULL) return; @@ -914,7 +928,7 @@ pfi_ifaddr_event(void *arg __unused, struct ifnet *ifp) return; } PF_RULES_WLOCK(); - if (ifp && ifp->if_pf_kif) { + if (ifp->if_pf_kif) { V_pfi_update++; pfi_kif_update(ifp->if_pf_kif); } diff --git a/freebsd/sys/netpfil/pf/pf_ioctl.c b/freebsd/sys/netpfil/pf/pf_ioctl.c index 9ca15a41..3f732d62 100644 --- a/freebsd/sys/netpfil/pf/pf_ioctl.c +++ b/freebsd/sys/netpfil/pf/pf_ioctl.c @@ -214,12 +214,14 @@ struct sx pf_ioctl_lock; struct sx pf_end_lock; /* pfsync */ -pfsync_state_import_t *pfsync_state_import_ptr = NULL; -pfsync_insert_state_t *pfsync_insert_state_ptr = NULL; -pfsync_update_state_t *pfsync_update_state_ptr = NULL; -pfsync_delete_state_t *pfsync_delete_state_ptr = NULL; -pfsync_clear_states_t *pfsync_clear_states_ptr = NULL; -pfsync_defer_t *pfsync_defer_ptr = NULL; +VNET_DEFINE(pfsync_state_import_t *, pfsync_state_import_ptr); +VNET_DEFINE(pfsync_insert_state_t *, pfsync_insert_state_ptr); +VNET_DEFINE(pfsync_update_state_t *, pfsync_update_state_ptr); +VNET_DEFINE(pfsync_delete_state_t *, pfsync_delete_state_ptr); +VNET_DEFINE(pfsync_clear_states_t *, pfsync_clear_states_ptr); +VNET_DEFINE(pfsync_defer_t *, pfsync_defer_ptr); +pfsync_detach_ifnet_t *pfsync_detach_ifnet_ptr; + /* pflog */ pflog_packet_t *pflog_packet_ptr = NULL; @@ -1885,8 +1887,8 @@ relock_DIOCCLRSTATES: PF_HASHROW_UNLOCK(ih); } psk->psk_killed = killed; - if (pfsync_clear_states_ptr != NULL) - pfsync_clear_states_ptr(V_pf_status.hostid, psk->psk_ifname); + if (V_pfsync_clear_states_ptr != NULL) + V_pfsync_clear_states_ptr(V_pf_status.hostid, psk->psk_ifname); break; } @@ -1973,9 +1975,9 @@ relock_DIOCKILLSTATES: error = EINVAL; break; } - if (pfsync_state_import_ptr != NULL) { + if (V_pfsync_state_import_ptr != NULL) { PF_RULES_RLOCK(); - error = pfsync_state_import_ptr(sp, PFSYNC_SI_IOCTL); + error = V_pfsync_state_import_ptr(sp, PFSYNC_SI_IOCTL); PF_RULES_RUNLOCK(); } else error = EOPNOTSUPP; diff --git a/freebsd/sys/netpfil/pf/pf_norm.c b/freebsd/sys/netpfil/pf/pf_norm.c index 0f98c669..9538e97c 100644 --- a/freebsd/sys/netpfil/pf/pf_norm.c +++ b/freebsd/sys/netpfil/pf/pf_norm.c @@ -4,7 +4,7 @@ * SPDX-License-Identifier: BSD-2-Clause * * Copyright 2001 Niels Provos <provos@citi.umich.edu> - * Copyright 2011 Alexander Bluhm <bluhm@openbsd.org> + * Copyright 2011-2018 Alexander Bluhm <bluhm@openbsd.org> * All rights reserved. * * Redistribution and use in source and binary forms, with or without @@ -89,14 +89,17 @@ struct pf_fragment { #define fr_af fr_key.frc_af #define fr_proto fr_key.frc_proto + /* pointers to queue element */ + struct pf_frent *fr_firstoff[PF_FRAG_ENTRY_POINTS]; + /* count entries between pointers */ + uint8_t fr_entries[PF_FRAG_ENTRY_POINTS]; RB_ENTRY(pf_fragment) fr_entry; TAILQ_ENTRY(pf_fragment) frag_next; uint32_t fr_timeout; uint16_t fr_maxlen; /* maximum length of single fragment */ - uint16_t fr_entries; /* Total number of pf_fragment entries */ + u_int16_t fr_holes; /* number of holes in the queue */ TAILQ_HEAD(pf_fragq, pf_frent) fr_queue; }; -#define PF_MAX_FRENT_PER_FRAGMENT 64 struct pf_fragment_tag { uint16_t ft_hdrlen; /* header length of reassembled pkt */ @@ -136,11 +139,18 @@ static void pf_remove_fragment(struct pf_fragment *); static int pf_normalize_tcpopt(struct pf_rule *, struct mbuf *, struct tcphdr *, int, sa_family_t); static struct pf_frent *pf_create_fragment(u_short *); +static int pf_frent_holes(struct pf_frent *frent); static struct pf_fragment *pf_find_fragment(struct pf_fragment_cmp *key, struct pf_frag_tree *tree); +static inline int pf_frent_index(struct pf_frent *); +static int pf_frent_insert(struct pf_fragment *, + struct pf_frent *, struct pf_frent *); +void pf_frent_remove(struct pf_fragment *, + struct pf_frent *); +struct pf_frent *pf_frent_previous(struct pf_fragment *, + struct pf_frent *); static struct pf_fragment *pf_fillup_fragment(struct pf_fragment_cmp *, struct pf_frent *, u_short *); -static int pf_isfull_fragment(struct pf_fragment *); static struct mbuf *pf_join_fragment(struct pf_fragment *); #ifdef INET static void pf_scrub_ip(struct mbuf **, uint32_t, uint8_t, uint8_t); @@ -311,6 +321,7 @@ pf_remove_fragment(struct pf_fragment *frag) { PF_FRAG_ASSERT(); + KASSERT(frag, ("frag != NULL")); RB_REMOVE(pf_frag_tree, &V_pf_frag_tree, frag); TAILQ_REMOVE(&V_pf_fragqueue, frag, frag_next); @@ -337,9 +348,201 @@ pf_create_fragment(u_short *reason) return (frent); } +/* + * Calculate the additional holes that were created in the fragment + * queue by inserting this fragment. A fragment in the middle + * creates one more hole by splitting. For each connected side, + * it loses one hole. + * Fragment entry must be in the queue when calling this function. + */ +static int +pf_frent_holes(struct pf_frent *frent) +{ + struct pf_frent *prev = TAILQ_PREV(frent, pf_fragq, fr_next); + struct pf_frent *next = TAILQ_NEXT(frent, fr_next); + int holes = 1; + + if (prev == NULL) { + if (frent->fe_off == 0) + holes--; + } else { + KASSERT(frent->fe_off != 0, ("frent->fe_off != 0")); + if (frent->fe_off == prev->fe_off + prev->fe_len) + holes--; + } + if (next == NULL) { + if (!frent->fe_mff) + holes--; + } else { + KASSERT(frent->fe_mff, ("frent->fe_mff")); + if (next->fe_off == frent->fe_off + frent->fe_len) + holes--; + } + return holes; +} + +static inline int +pf_frent_index(struct pf_frent *frent) +{ + /* + * We have an array of 16 entry points to the queue. A full size + * 65535 octet IP packet can have 8192 fragments. So the queue + * traversal length is at most 512 and at most 16 entry points are + * checked. We need 128 additional bytes on a 64 bit architecture. + */ + CTASSERT(((u_int16_t)0xffff &~ 7) / (0x10000 / PF_FRAG_ENTRY_POINTS) == + 16 - 1); + CTASSERT(((u_int16_t)0xffff >> 3) / PF_FRAG_ENTRY_POINTS == 512 - 1); + + return frent->fe_off / (0x10000 / PF_FRAG_ENTRY_POINTS); +} + +static int +pf_frent_insert(struct pf_fragment *frag, struct pf_frent *frent, + struct pf_frent *prev) +{ + int index; + + CTASSERT(PF_FRAG_ENTRY_LIMIT <= 0xff); + + /* + * A packet has at most 65536 octets. With 16 entry points, each one + * spawns 4096 octets. We limit these to 64 fragments each, which + * means on average every fragment must have at least 64 octets. + */ + index = pf_frent_index(frent); + if (frag->fr_entries[index] >= PF_FRAG_ENTRY_LIMIT) + return ENOBUFS; + frag->fr_entries[index]++; + + if (prev == NULL) { + TAILQ_INSERT_HEAD(&frag->fr_queue, frent, fr_next); + } else { + KASSERT(prev->fe_off + prev->fe_len <= frent->fe_off, + ("overlapping fragment")); + TAILQ_INSERT_AFTER(&frag->fr_queue, prev, frent, fr_next); + } + + if (frag->fr_firstoff[index] == NULL) { + KASSERT(prev == NULL || pf_frent_index(prev) < index, + ("prev == NULL || pf_frent_index(pref) < index")); + frag->fr_firstoff[index] = frent; + } else { + if (frent->fe_off < frag->fr_firstoff[index]->fe_off) { + KASSERT(prev == NULL || pf_frent_index(prev) < index, + ("prev == NULL || pf_frent_index(pref) < index")); + frag->fr_firstoff[index] = frent; + } else { + KASSERT(prev != NULL, ("prev != NULL")); + KASSERT(pf_frent_index(prev) == index, + ("pf_frent_index(prev) == index")); + } + } + + frag->fr_holes += pf_frent_holes(frent); + + return 0; +} + +void +pf_frent_remove(struct pf_fragment *frag, struct pf_frent *frent) +{ +#ifdef INVARIANTS + struct pf_frent *prev = TAILQ_PREV(frent, pf_fragq, fr_next); +#endif + struct pf_frent *next = TAILQ_NEXT(frent, fr_next); + int index; + + frag->fr_holes -= pf_frent_holes(frent); + + index = pf_frent_index(frent); + KASSERT(frag->fr_firstoff[index] != NULL, ("frent not found")); + if (frag->fr_firstoff[index]->fe_off == frent->fe_off) { + if (next == NULL) { + frag->fr_firstoff[index] = NULL; + } else { + KASSERT(frent->fe_off + frent->fe_len <= next->fe_off, + ("overlapping fragment")); + if (pf_frent_index(next) == index) { + frag->fr_firstoff[index] = next; + } else { + frag->fr_firstoff[index] = NULL; + } + } + } else { + KASSERT(frag->fr_firstoff[index]->fe_off < frent->fe_off, + ("frag->fr_firstoff[index]->fe_off < frent->fe_off")); + KASSERT(prev != NULL, ("prev != NULL")); + KASSERT(prev->fe_off + prev->fe_len <= frent->fe_off, + ("overlapping fragment")); + KASSERT(pf_frent_index(prev) == index, + ("pf_frent_index(prev) == index")); + } + + TAILQ_REMOVE(&frag->fr_queue, frent, fr_next); + + KASSERT(frag->fr_entries[index] > 0, ("No fragments remaining")); + frag->fr_entries[index]--; +} + +struct pf_frent * +pf_frent_previous(struct pf_fragment *frag, struct pf_frent *frent) +{ + struct pf_frent *prev, *next; + int index; + + /* + * If there are no fragments after frag, take the final one. Assume + * that the global queue is not empty. + */ + prev = TAILQ_LAST(&frag->fr_queue, pf_fragq); + KASSERT(prev != NULL, ("prev != NULL")); + if (prev->fe_off <= frent->fe_off) + return prev; + /* + * We want to find a fragment entry that is before frag, but still + * close to it. Find the first fragment entry that is in the same + * entry point or in the first entry point after that. As we have + * already checked that there are entries behind frag, this will + * succeed. + */ + for (index = pf_frent_index(frent); index < PF_FRAG_ENTRY_POINTS; + index++) { + prev = frag->fr_firstoff[index]; + if (prev != NULL) + break; + } + KASSERT(prev != NULL, ("prev != NULL")); + /* + * In prev we may have a fragment from the same entry point that is + * before frent, or one that is just one position behind frent. + * In the latter case, we go back one step and have the predecessor. + * There may be none if the new fragment will be the first one. + */ + if (prev->fe_off > frent->fe_off) { + prev = TAILQ_PREV(prev, pf_fragq, fr_next); + if (prev == NULL) + return NULL; + KASSERT(prev->fe_off <= frent->fe_off, + ("prev->fe_off <= frent->fe_off")); + return prev; + } + /* + * In prev is the first fragment of the entry point. The offset + * of frag is behind it. Find the closest previous fragment. + */ + for (next = TAILQ_NEXT(prev, fr_next); next != NULL; + next = TAILQ_NEXT(next, fr_next)) { + if (next->fe_off > frent->fe_off) + break; + prev = next; + } + return prev; +} + static struct pf_fragment * pf_fillup_fragment(struct pf_fragment_cmp *key, struct pf_frent *frent, - u_short *reason) + u_short *reason) { struct pf_frent *after, *next, *prev; struct pf_fragment *frag; @@ -386,23 +589,22 @@ pf_fillup_fragment(struct pf_fragment_cmp *key, struct pf_frent *frent, } *(struct pf_fragment_cmp *)frag = *key; + memset(frag->fr_firstoff, 0, sizeof(frag->fr_firstoff)); + memset(frag->fr_entries, 0, sizeof(frag->fr_entries)); frag->fr_timeout = time_uptime; frag->fr_maxlen = frent->fe_len; - frag->fr_entries = 0; + frag->fr_holes = 1; TAILQ_INIT(&frag->fr_queue); RB_INSERT(pf_frag_tree, &V_pf_frag_tree, frag); TAILQ_INSERT_HEAD(&V_pf_fragqueue, frag, frag_next); - /* We do not have a previous fragment. */ - TAILQ_INSERT_HEAD(&frag->fr_queue, frent, fr_next); + /* We do not have a previous fragment, cannot fail. */ + pf_frent_insert(frag, frent, NULL); return (frag); } - if (frag->fr_entries >= PF_MAX_FRENT_PER_FRAGMENT) - goto bad_fragment; - KASSERT(!TAILQ_EMPTY(&frag->fr_queue), ("!TAILQ_EMPTY()->fr_queue")); /* Remember maximum fragment len for refragmentation. */ @@ -427,17 +629,15 @@ pf_fillup_fragment(struct pf_fragment_cmp *key, struct pf_frent *frent, goto bad_fragment; } - /* Find a fragment after the current one. */ - prev = NULL; - TAILQ_FOREACH(after, &frag->fr_queue, fr_next) { - if (after->fe_off > frent->fe_off) - break; - prev = after; + /* Find neighbors for newly inserted fragment */ + prev = pf_frent_previous(frag, frent); + if (prev == NULL) { + after = TAILQ_FIRST(&frag->fr_queue); + KASSERT(after != NULL, ("after != NULL")); + } else { + after = TAILQ_NEXT(prev, fr_next); } - KASSERT(prev != NULL || after != NULL, - ("prev != NULL || after != NULL")); - if (prev != NULL && prev->fe_off + prev->fe_len > frent->fe_off) { uint16_t precut; @@ -465,17 +665,16 @@ pf_fillup_fragment(struct pf_fragment_cmp *key, struct pf_frent *frent, /* This fragment is completely overlapped, lose it. */ next = TAILQ_NEXT(after, fr_next); + pf_frent_remove(frag, after); m_freem(after->fe_m); - TAILQ_REMOVE(&frag->fr_queue, after, fr_next); uma_zfree(V_pf_frent_z, after); } - if (prev == NULL) - TAILQ_INSERT_HEAD(&frag->fr_queue, frent, fr_next); - else - TAILQ_INSERT_AFTER(&frag->fr_queue, prev, frent, fr_next); - - frag->fr_entries++; + /* If part of the queue gets too long, there is not way to recover. */ + if (pf_frent_insert(frag, frent, prev)) { + DPFPRINTF(("fragment queue limit exceeded")); + goto bad_fragment; + } return (frag); @@ -486,40 +685,6 @@ drop_fragment: return (NULL); } -static int -pf_isfull_fragment(struct pf_fragment *frag) -{ - struct pf_frent *frent, *next; - uint16_t off, total; - - /* Check if we are completely reassembled */ - if (TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_mff) - return (0); - - /* Maximum data we have seen already */ - total = TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_off + - TAILQ_LAST(&frag->fr_queue, pf_fragq)->fe_len; - - /* Check if we have all the data */ - off = 0; - for (frent = TAILQ_FIRST(&frag->fr_queue); frent; frent = next) { - next = TAILQ_NEXT(frent, fr_next); - - off += frent->fe_len; - if (off < total && (next == NULL || next->fe_off != off)) { - DPFPRINTF(("missing fragment at %d, next %d, total %d", - off, next == NULL ? -1 : next->fe_off, total)); - return (0); - } - } - DPFPRINTF(("%d < %d?", off, total)); - if (off < total) - return (0); - KASSERT(off == total, ("off == total")); - - return (1); -} - static struct mbuf * pf_join_fragment(struct pf_fragment *frag) { @@ -580,8 +745,10 @@ pf_reassemble(struct mbuf **m0, struct ip *ip, int dir, u_short *reason) /* The mbuf is part of the fragment entry, no direct free or access */ m = *m0 = NULL; - if (!pf_isfull_fragment(frag)) + if (frag->fr_holes) { + DPFPRINTF(("frag %d, holes %d", frag->fr_id, frag->fr_holes)); return (PF_PASS); /* drop because *m0 is NULL, no error */ + } /* We have all the data */ frent = TAILQ_FIRST(&frag->fr_queue); @@ -664,7 +831,8 @@ pf_reassemble6(struct mbuf **m0, struct ip6_hdr *ip6, struct ip6_frag *fraghdr, /* The mbuf is part of the fragment entry, no direct free or access. */ m = *m0 = NULL; - if (!pf_isfull_fragment(frag)) { + if (frag->fr_holes) { + DPFPRINTF(("frag %d, holes %d", frag->fr_id, frag->fr_holes)); PF_FRAG_UNLOCK(); return (PF_PASS); /* Drop because *m0 is NULL, no error. */ } diff --git a/freebsd/sys/netpfil/pf/pf_table.c b/freebsd/sys/netpfil/pf/pf_table.c index 1fadd38c..3f15fb0e 100644 --- a/freebsd/sys/netpfil/pf/pf_table.c +++ b/freebsd/sys/netpfil/pf/pf_table.c @@ -1756,6 +1756,7 @@ pfr_setflags_ktable(struct pfr_ktable *kt, int newf) PF_RULES_WASSERT(); if (!(newf & PFR_TFLAG_REFERENCED) && + !(newf & PFR_TFLAG_REFDANCHOR) && !(newf & PFR_TFLAG_PERSIST)) newf &= ~PFR_TFLAG_ACTIVE; if (!(newf & PFR_TFLAG_ACTIVE)) diff --git a/freebsd/sys/opencrypto/xform_auth.h b/freebsd/sys/opencrypto/xform_auth.h index 06183868..9af0f8e6 100644 --- a/freebsd/sys/opencrypto/xform_auth.h +++ b/freebsd/sys/opencrypto/xform_auth.h @@ -36,6 +36,7 @@ #include <sys/md5.h> #include <crypto/sha1.h> +#include <crypto/sha2/sha224.h> #include <crypto/sha2/sha256.h> #include <crypto/sha2/sha384.h> #include <crypto/sha2/sha512.h> @@ -89,6 +90,7 @@ union authctx { MD5_CTX md5ctx; SHA1_CTX sha1ctx; RMD160_CTX rmd160ctx; + SHA224_CTX sha224ctx; SHA256_CTX sha256ctx; SHA384_CTX sha384ctx; SHA512_CTX sha512ctx; diff --git a/freebsd/sys/security/audit/audit.h b/freebsd/sys/security/audit/audit.h index f24bc1e5..2e47b8c4 100644 --- a/freebsd/sys/security/audit/audit.h +++ b/freebsd/sys/security/audit/audit.h @@ -122,7 +122,7 @@ void audit_arg_upath2(struct thread *td, int dirfd, char *upath); void audit_arg_upath2_canon(char *upath); void audit_arg_vnode1(struct vnode *vp); void audit_arg_vnode2(struct vnode *vp); -void audit_arg_text(char *text); +void audit_arg_text(const char *text); void audit_arg_cmd(int cmd); void audit_arg_svipc_cmd(int cmd); void audit_arg_svipc_perm(struct ipc_perm *perm); diff --git a/freebsd/sys/sys/_domainset.h b/freebsd/sys/sys/_domainset.h index 34d8f61c..5685d532 100644 --- a/freebsd/sys/sys/_domainset.h +++ b/freebsd/sys/sys/_domainset.h @@ -54,7 +54,7 @@ typedef struct _domainset domainset_t; struct domainset; struct domainset_ref { struct domainset * volatile dr_policy; - unsigned int dr_iterator; + unsigned int dr_iter; }; #endif /* !_SYS__DOMAINSET_H_ */ diff --git a/freebsd/sys/sys/blist.h b/freebsd/sys/sys/blist.h index 22d834f7..595d3477 100644 --- a/freebsd/sys/sys/blist.h +++ b/freebsd/sys/sys/blist.h @@ -73,22 +73,20 @@ typedef uint64_t u_daddr_t; /* unsigned disk address */ */ typedef struct blmeta { - union { - daddr_t bmu_avail; /* space available under us */ - u_daddr_t bmu_bitmap; /* bitmap if we are a leaf */ - } u; + u_daddr_t bm_bitmap; /* bitmap if we are a leaf */ daddr_t bm_bighint; /* biggest contiguous block hint*/ } blmeta_t; typedef struct blist { daddr_t bl_blocks; /* area of coverage */ + daddr_t bl_avail; /* # available blocks */ u_daddr_t bl_radix; /* coverage radix */ daddr_t bl_cursor; /* next-fit search starts at */ blmeta_t bl_root[1]; /* root of radix tree */ } *blist_t; -#define BLIST_META_RADIX 16 #define BLIST_BMAP_RADIX (sizeof(u_daddr_t)*8) +#define BLIST_META_RADIX BLIST_BMAP_RADIX #define BLIST_MAX_ALLOC BLIST_BMAP_RADIX diff --git a/freebsd/sys/sys/capability.h b/freebsd/sys/sys/capability.h index 4b07c090..39195e03 100644 --- a/freebsd/sys/sys/capability.h +++ b/freebsd/sys/sys/capability.h @@ -35,7 +35,8 @@ * Historically, the key userspace and kernel Capsicum definitions were found * in this file. However, it conflicted with POSIX.1e's capability.h, so has * been renamed capsicum.h. The file remains for backwards compatibility - * reasons as a nested include. + * reasons as a nested include. It is expected to be removed before + * FreeBSD 13. */ #ifndef _SYS_CAPABILITY_H_ #define _SYS_CAPABILITY_H_ diff --git a/freebsd/sys/sys/domainset.h b/freebsd/sys/sys/domainset.h index 32b35ac5..5a00347f 100644 --- a/freebsd/sys/sys/domainset.h +++ b/freebsd/sys/sys/domainset.h @@ -96,7 +96,8 @@ struct domainset { domainid_t ds_order[MAXMEMDOM]; /* nth domain table. */ }; -extern struct domainset domainset_prefer[MAXMEMDOM]; +extern struct domainset domainset_fixed[MAXMEMDOM], domainset_prefer[MAXMEMDOM]; +#define DOMAINSET_FIXED(domain) (&domainset_fixed[(domain)]) #define DOMAINSET_PREF(domain) (&domainset_prefer[(domain)]) extern struct domainset domainset_roundrobin; #define DOMAINSET_RR() (&domainset_roundrobin) diff --git a/freebsd/sys/sys/gtaskqueue.h b/freebsd/sys/sys/gtaskqueue.h index c06ef503..a36c770a 100644 --- a/freebsd/sys/sys/gtaskqueue.h +++ b/freebsd/sys/sys/gtaskqueue.h @@ -52,7 +52,9 @@ int gtaskqueue_cancel(struct gtaskqueue *queue, struct gtask *gtask); void gtaskqueue_drain(struct gtaskqueue *queue, struct gtask *task); void gtaskqueue_drain_all(struct gtaskqueue *queue); -int grouptaskqueue_enqueue(struct gtaskqueue *queue, struct gtask *task); +void grouptask_block(struct grouptask *grouptask); +void grouptask_unblock(struct grouptask *grouptask); +int grouptaskqueue_enqueue(struct gtaskqueue *queue, struct gtask *task); void taskqgroup_attach(struct taskqgroup *qgroup, struct grouptask *grptask, void *uniq, int irq, const char *name); int taskqgroup_attach_cpu(struct taskqgroup *qgroup, struct grouptask *grptask, @@ -67,6 +69,7 @@ void taskqgroup_config_gtask_deinit(struct grouptask *gtask); #define TASK_ENQUEUED 0x1 #define TASK_SKIP_WAKEUP 0x2 +#define TASK_NOENQUEUE 0x4 #define GTASK_INIT(task, flags, priority, func, context) do { \ diff --git a/freebsd/sys/sys/malloc.h b/freebsd/sys/sys/malloc.h index 4e7a8df3..ca81a198 100644 --- a/freebsd/sys/sys/malloc.h +++ b/freebsd/sys/sys/malloc.h @@ -166,6 +166,7 @@ MALLOC_DECLARE(M_TEMP); */ MALLOC_DECLARE(M_IOV); +struct domainset; extern struct mtx malloc_mtx; /* @@ -178,8 +179,8 @@ void *contigmalloc(unsigned long size, struct malloc_type *type, int flags, vm_paddr_t low, vm_paddr_t high, unsigned long alignment, vm_paddr_t boundary) __malloc_like __result_use_check __alloc_size(1) __alloc_align(6); -void *contigmalloc_domain(unsigned long size, struct malloc_type *type, - int domain, int flags, vm_paddr_t low, vm_paddr_t high, +void *contigmalloc_domainset(unsigned long size, struct malloc_type *type, + struct domainset *ds, int flags, vm_paddr_t low, vm_paddr_t high, unsigned long alignment, vm_paddr_t boundary) __malloc_like __result_use_check __alloc_size(1) __alloc_align(6); void free(void *addr, struct malloc_type *type); @@ -248,8 +249,9 @@ void *_bsd_malloc(size_t size, struct malloc_type *type, int flags) _malloc_item; \ }) -void *malloc_domain(size_t size, struct malloc_type *type, int domain, - int flags) __malloc_like __result_use_check __alloc_size(1); +void *malloc_domainset(size_t size, struct malloc_type *type, + struct domainset *ds, int flags) __malloc_like __result_use_check + __alloc_size(1); void *mallocarray(size_t nmemb, size_t size, struct malloc_type *type, int flags) __malloc_like __result_use_check __alloc_size2(1, 2); diff --git a/freebsd/sys/sys/mount.h b/freebsd/sys/sys/mount.h index c4f4bebf..ac1c215b 100644 --- a/freebsd/sys/sys/mount.h +++ b/freebsd/sys/sys/mount.h @@ -513,6 +513,7 @@ struct vfsconf { u_int vfc_version; /* ABI version number */ char vfc_name[MFSNAMELEN]; /* filesystem type name */ struct vfsops *vfc_vfsops; /* filesystem operations vector */ + struct vfsops *vfc_vfsops_sd; /* ... signal-deferred */ int vfc_typenum; /* historic filesystem type number */ int vfc_refcount; /* number mounted of this type */ int vfc_flags; /* permanent flags */ @@ -553,7 +554,8 @@ struct ovfsconf { #define VFCF_UNICODE 0x00200000 /* stores file names as Unicode */ #define VFCF_JAIL 0x00400000 /* can be mounted from within a jail */ #define VFCF_DELEGADMIN 0x00800000 /* supports delegated administration */ -#define VFCF_SBDRY 0x01000000 /* defer stop requests */ +#define VFCF_SBDRY 0x01000000 /* Stop at Boundary: defer stop requests + to kernel->user (AST) transition */ typedef uint32_t fsctlop_t; @@ -694,139 +696,96 @@ struct vfsops { vfs_statfs_t __vfs_statfs; -#define VFS_PROLOGUE(MP) do { \ - struct mount *mp__; \ - int _prev_stops; \ - \ - mp__ = (MP); \ - _prev_stops = sigdeferstop((mp__ != NULL && \ - (mp__->mnt_vfc->vfc_flags & VFCF_SBDRY) != 0) ? \ - SIGDEFERSTOP_SILENT : SIGDEFERSTOP_NOP); - -#define VFS_EPILOGUE(MP) \ - sigallowstop(_prev_stops); \ -} while (0) - #define VFS_MOUNT(MP) ({ \ int _rc; \ \ TSRAW(curthread, TS_ENTER, "VFS_MOUNT", (MP)->mnt_vfc->vfc_name);\ - VFS_PROLOGUE(MP); \ _rc = (*(MP)->mnt_op->vfs_mount)(MP); \ - VFS_EPILOGUE(MP); \ TSRAW(curthread, TS_EXIT, "VFS_MOUNT", (MP)->mnt_vfc->vfc_name);\ _rc; }) #define VFS_UNMOUNT(MP, FORCE) ({ \ int _rc; \ \ - VFS_PROLOGUE(MP); \ _rc = (*(MP)->mnt_op->vfs_unmount)(MP, FORCE); \ - VFS_EPILOGUE(MP); \ _rc; }) #define VFS_ROOT(MP, FLAGS, VPP) ({ \ int _rc; \ \ - VFS_PROLOGUE(MP); \ _rc = (*(MP)->mnt_op->vfs_root)(MP, FLAGS, VPP); \ - VFS_EPILOGUE(MP); \ _rc; }) #define VFS_QUOTACTL(MP, C, U, A) ({ \ int _rc; \ \ - VFS_PROLOGUE(MP); \ _rc = (*(MP)->mnt_op->vfs_quotactl)(MP, C, U, A); \ - VFS_EPILOGUE(MP); \ _rc; }) #define VFS_STATFS(MP, SBP) ({ \ int _rc; \ \ - VFS_PROLOGUE(MP); \ _rc = __vfs_statfs((MP), (SBP)); \ - VFS_EPILOGUE(MP); \ _rc; }) #define VFS_SYNC(MP, WAIT) ({ \ int _rc; \ \ - VFS_PROLOGUE(MP); \ _rc = (*(MP)->mnt_op->vfs_sync)(MP, WAIT); \ - VFS_EPILOGUE(MP); \ _rc; }) #define VFS_VGET(MP, INO, FLAGS, VPP) ({ \ int _rc; \ \ - VFS_PROLOGUE(MP); \ _rc = (*(MP)->mnt_op->vfs_vget)(MP, INO, FLAGS, VPP); \ - VFS_EPILOGUE(MP); \ _rc; }) #define VFS_FHTOVP(MP, FIDP, FLAGS, VPP) ({ \ int _rc; \ \ - VFS_PROLOGUE(MP); \ _rc = (*(MP)->mnt_op->vfs_fhtovp)(MP, FIDP, FLAGS, VPP); \ - VFS_EPILOGUE(MP); \ _rc; }) #define VFS_CHECKEXP(MP, NAM, EXFLG, CRED, NUMSEC, SEC) ({ \ int _rc; \ \ - VFS_PROLOGUE(MP); \ _rc = (*(MP)->mnt_op->vfs_checkexp)(MP, NAM, EXFLG, CRED, NUMSEC,\ SEC); \ - VFS_EPILOGUE(MP); \ _rc; }) #define VFS_EXTATTRCTL(MP, C, FN, NS, N) ({ \ int _rc; \ \ - VFS_PROLOGUE(MP); \ _rc = (*(MP)->mnt_op->vfs_extattrctl)(MP, C, FN, NS, N); \ - VFS_EPILOGUE(MP); \ _rc; }) #define VFS_SYSCTL(MP, OP, REQ) ({ \ int _rc; \ \ - VFS_PROLOGUE(MP); \ _rc = (*(MP)->mnt_op->vfs_sysctl)(MP, OP, REQ); \ - VFS_EPILOGUE(MP); \ _rc; }) #define VFS_SUSP_CLEAN(MP) do { \ if (*(MP)->mnt_op->vfs_susp_clean != NULL) { \ - VFS_PROLOGUE(MP); \ (*(MP)->mnt_op->vfs_susp_clean)(MP); \ - VFS_EPILOGUE(MP); \ } \ } while (0) #define VFS_RECLAIM_LOWERVP(MP, VP) do { \ if (*(MP)->mnt_op->vfs_reclaim_lowervp != NULL) { \ - VFS_PROLOGUE(MP); \ (*(MP)->mnt_op->vfs_reclaim_lowervp)((MP), (VP)); \ - VFS_EPILOGUE(MP); \ } \ } while (0) #define VFS_UNLINK_LOWERVP(MP, VP) do { \ if (*(MP)->mnt_op->vfs_unlink_lowervp != NULL) { \ - VFS_PROLOGUE(MP); \ (*(MP)->mnt_op->vfs_unlink_lowervp)((MP), (VP)); \ - VFS_EPILOGUE(MP); \ } \ } while (0) #define VFS_PURGE(MP) do { \ if (*(MP)->mnt_op->vfs_purge != NULL) { \ - VFS_PROLOGUE(MP); \ (*(MP)->mnt_op->vfs_purge)(MP); \ - VFS_EPILOGUE(MP); \ } \ } while (0) diff --git a/freebsd/sys/sys/mouse.h b/freebsd/sys/sys/mouse.h index 3bb65d5c..e6ea68bc 100644 --- a/freebsd/sys/sys/mouse.h +++ b/freebsd/sys/sys/mouse.h @@ -190,7 +190,7 @@ typedef struct mousemode { * GlidePoint, IntelliMouse, Thinking Mouse, MouseRemote, Kidspad, * VersaPad * Bus mouse protocols: - * bus, InPort + * bus, InPort -- both of these are now obsolete and will be remvoed soon. * PS/2 mouse protocol: * PS/2 */ @@ -200,8 +200,8 @@ typedef struct mousemode { #define MOUSE_PROTO_LOGI 2 /* Logitech, 3 bytes */ #define MOUSE_PROTO_MM 3 /* MM series, 3 bytes */ #define MOUSE_PROTO_LOGIMOUSEMAN 4 /* Logitech MouseMan 3/4 bytes */ -/* 5 was bus mouse */ -/* 6 was inport mosue */ +#define MOUSE_PROTO_BUS 5 /* bus mouse -- obsolete */ +#define MOUSE_PROTO_INPORT 6 /* inport mosue -- obsolete */ #define MOUSE_PROTO_PS2 7 /* PS/2 mouse, 3 bytes */ #define MOUSE_PROTO_HITTAB 8 /* Hitachi Tablet 3 bytes */ #define MOUSE_PROTO_GLIDEPOINT 9 /* ALPS GlidePoint, 3/4 bytes */ diff --git a/freebsd/sys/sys/proc.h b/freebsd/sys/sys/proc.h index 9372b3a0..9868f9cf 100644 --- a/freebsd/sys/sys/proc.h +++ b/freebsd/sys/sys/proc.h @@ -73,6 +73,9 @@ #ifdef _KERNEL #include <machine/cpu.h> #endif +#ifdef __rtems__ +#include <sys/epoch.h> +#endif /* __rtems__ */ /* * One structure allocated per session. @@ -193,6 +196,7 @@ struct trapframe; struct turnstile; struct vm_map; struct vm_map_entry; +struct epoch_tracker; /* * XXX: Does this belong in resource.h or resourcevar.h instead? @@ -236,6 +240,7 @@ struct thread { #ifdef __rtems__ Thread_Control *td_thread; struct rtems_bsd_program_control *td_prog_ctrl; + struct epoch_tracker td_et[1]; /* (k) compat KPI spare tracker */ #endif /* __rtems__ */ #ifndef __rtems__ struct mtx *volatile td_lock; /* replaces sched lock */ @@ -395,6 +400,7 @@ struct thread { int td_lastcpu; /* (t) Last cpu we were on. */ int td_oncpu; /* (t) Which cpu we are on. */ void *td_lkpi_task; /* LinuxKPI task struct pointer */ + struct epoch_tracker *td_et; /* (k) compat KPI spare tracker */ int td_pmcpend; #endif /* __rtems__ */ }; diff --git a/freebsd/sys/sys/random.h b/freebsd/sys/sys/random.h index 2543bfec..aa6f6458 100644 --- a/freebsd/sys/sys/random.h +++ b/freebsd/sys/sys/random.h @@ -102,9 +102,6 @@ enum random_entropy_source { _Static_assert(ENTROPYSOURCE <= 32, "hardcoded assumption that values fit in a typical word-sized bitset"); -#define RANDOM_HARVEST_EVERYTHING_MASK ((1 << (RANDOM_ENVIRONMENTAL_END + 1)) - 1) -#define RANDOM_HARVEST_PURE_MASK (((1 << ENTROPYSOURCE) - 1) & (-1UL << RANDOM_PURE_START)) - #define RANDOM_LEGACY_BOOT_ENTROPY_MODULE "/boot/entropy" #define RANDOM_CACHED_BOOT_ENTROPY_MODULE "boot_entropy_cache" #define RANDOM_CACHED_SKIP_START 256 diff --git a/freebsd/sys/sys/socketvar.h b/freebsd/sys/sys/socketvar.h index 96ba4a01..af85aa05 100644 --- a/freebsd/sys/sys/socketvar.h +++ b/freebsd/sys/sys/socketvar.h @@ -380,7 +380,8 @@ struct uio; * From uipc_socket and friends */ #ifndef __rtems__ -int getsockaddr(struct sockaddr **namp, caddr_t uaddr, size_t len); +int getsockaddr(struct sockaddr **namp, const struct sockaddr *uaddr, + size_t len); #endif /* __rtems__ */ int getsock_cap(struct thread *td, int fd, cap_rights_t *rightsp, struct file **fpp, u_int *fflagp, struct filecaps *havecaps); diff --git a/freebsd/sys/sys/sysctl.h b/freebsd/sys/sys/sysctl.h index 995baf64..e270a4b2 100644 --- a/freebsd/sys/sys/sysctl.h +++ b/freebsd/sys/sys/sysctl.h @@ -220,6 +220,9 @@ int sysctl_handle_counter_u64_array(SYSCTL_HANDLER_ARGS); int sysctl_handle_uma_zone_max(SYSCTL_HANDLER_ARGS); int sysctl_handle_uma_zone_cur(SYSCTL_HANDLER_ARGS); +int sysctl_msec_to_sbintime(SYSCTL_HANDLER_ARGS); +int sysctl_usec_to_sbintime(SYSCTL_HANDLER_ARGS); + int sysctl_dpcpu_int(SYSCTL_HANDLER_ARGS); int sysctl_dpcpu_long(SYSCTL_HANDLER_ARGS); int sysctl_dpcpu_quad(SYSCTL_HANDLER_ARGS); @@ -840,6 +843,42 @@ TAILQ_HEAD(sysctl_ctx_list, sysctl_ctx_entry); NULL); \ }) +/* OID expressing a sbintime_t as microseconds */ +#define SYSCTL_SBINTIME_USEC(parent, nbr, name, access, ptr, descr) \ + SYSCTL_OID(parent, nbr, name, \ + CTLTYPE_INT | CTLFLAG_MPSAFE | CTLFLAG_RD | (access), \ + (ptr), 0, sysctl_usec_to_sbintime, "Q", descr); \ + CTASSERT(((access) & CTLTYPE) == 0 || \ + ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_S64) +#define SYSCTL_ADD_SBINTIME_USEC(ctx, parent, nbr, name, access, ptr, descr) \ +({ \ + sbintime_t *__ptr = (ptr); \ + CTASSERT(((access) & CTLTYPE) == 0 || \ + ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_S64); \ + sysctl_add_oid(ctx, parent, nbr, name, \ + CTLTYPE_INT | CTLFLAG_MPSAFE | CTLFLAG_RD | (access), \ + __ptr, 0, sysctl_usec_to_sbintime, "Q", __DESCR(descr), \ + NULL); \ +}) + +/* OID expressing a sbintime_t as milliseconds */ +#define SYSCTL_SBINTIME_MSEC(parent, nbr, name, access, ptr, descr) \ + SYSCTL_OID(parent, nbr, name, \ + CTLTYPE_INT | CTLFLAG_MPSAFE | CTLFLAG_RD | (access), \ + (ptr), 0, sysctl_msec_to_sbintime, "Q", descr); \ + CTASSERT(((access) & CTLTYPE) == 0 || \ + ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_S64) +#define SYSCTL_ADD_SBINTIME_MSEC(ctx, parent, nbr, name, access, ptr, descr) \ +({ \ + sbintime_t *__ptr = (ptr); \ + CTASSERT(((access) & CTLTYPE) == 0 || \ + ((access) & SYSCTL_CT_ASSERT_MASK) == CTLTYPE_S64); \ + sysctl_add_oid(ctx, parent, nbr, name, \ + CTLTYPE_INT | CTLFLAG_MPSAFE | CTLFLAG_RD | (access), \ + __ptr, 0, sysctl_msec_to_sbintime, "Q", __DESCR(descr), \ + NULL); \ +}) + /* * A macro to generate a read-only sysctl to indicate the presence of optional * kernel features. diff --git a/freebsd/sys/sys/sysproto.h b/freebsd/sys/sys/sysproto.h index 8317e586..a39f5cd3 100644 --- a/freebsd/sys/sys/sysproto.h +++ b/freebsd/sys/sys/sysproto.h @@ -55,9 +55,9 @@ struct write_args { char nbyte_l_[PADL_(size_t)]; size_t nbyte; char nbyte_r_[PADR_(size_t)]; }; struct open_args { - char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)]; + char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)]; char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)]; - char mode_l_[PADL_(int)]; int mode; char mode_r_[PADR_(int)]; + char mode_l_[PADL_(mode_t)]; mode_t mode; char mode_r_[PADR_(mode_t)]; }; struct close_args { char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; @@ -69,24 +69,24 @@ struct wait4_args { char rusage_l_[PADL_(struct rusage *)]; struct rusage * rusage; char rusage_r_[PADR_(struct rusage *)]; }; struct link_args { - char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)]; - char link_l_[PADL_(char *)]; char * link; char link_r_[PADR_(char *)]; + char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)]; + char link_l_[PADL_(const char *)]; const char * link; char link_r_[PADR_(const char *)]; }; struct unlink_args { - char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)]; + char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)]; }; struct chdir_args { - char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)]; + char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)]; }; struct fchdir_args { char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; }; struct chmod_args { - char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)]; - char mode_l_[PADL_(int)]; int mode; char mode_r_[PADR_(int)]; + char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)]; + char mode_l_[PADL_(mode_t)]; mode_t mode; char mode_r_[PADR_(mode_t)]; }; struct chown_args { - char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)]; + char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)]; char uid_l_[PADL_(int)]; int uid; char uid_r_[PADR_(int)]; char gid_l_[PADL_(int)]; int gid; char gid_r_[PADR_(int)]; }; @@ -97,13 +97,13 @@ struct getpid_args { register_t dummy; }; struct mount_args { - char type_l_[PADL_(char *)]; char * type; char type_r_[PADR_(char *)]; - char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)]; + char type_l_[PADL_(const char *)]; const char * type; char type_r_[PADR_(const char *)]; + char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)]; char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)]; - char data_l_[PADL_(caddr_t)]; caddr_t data; char data_r_[PADR_(caddr_t)]; + char data_l_[PADL_(void *)]; void * data; char data_r_[PADR_(void *)]; }; struct unmount_args { - char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)]; + char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)]; char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)]; }; struct setuid_args { @@ -138,7 +138,7 @@ struct sendmsg_args { }; struct recvfrom_args { char s_l_[PADL_(int)]; int s; char s_r_[PADR_(int)]; - char buf_l_[PADL_(caddr_t)]; caddr_t buf; char buf_r_[PADR_(caddr_t)]; + char buf_l_[PADL_(void *)]; void * buf; char buf_r_[PADR_(void *)]; char len_l_[PADL_(size_t)]; size_t len; char len_r_[PADR_(size_t)]; char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)]; char from_l_[PADL_(struct sockaddr *)]; struct sockaddr * from; char from_r_[PADR_(struct sockaddr *)]; @@ -161,7 +161,7 @@ struct getsockname_args { }; #ifndef __rtems__ struct access_args { - char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)]; + char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)]; char amode_l_[PADL_(int)]; int amode; char amode_r_[PADR_(int)]; }; struct chflags_args { @@ -192,7 +192,7 @@ struct getegid_args { register_t dummy; }; struct profil_args { - char samples_l_[PADL_(caddr_t)]; caddr_t samples; char samples_r_[PADR_(caddr_t)]; + char samples_l_[PADL_(char *)]; char * samples; char samples_r_[PADR_(char *)]; char size_l_[PADL_(size_t)]; size_t size; char size_r_[PADR_(size_t)]; char offset_l_[PADL_(size_t)]; size_t offset; char offset_r_[PADR_(size_t)]; char scale_l_[PADL_(u_int)]; u_int scale; char scale_r_[PADR_(u_int)]; @@ -211,10 +211,10 @@ struct getlogin_args { char namelen_l_[PADL_(u_int)]; u_int namelen; char namelen_r_[PADR_(u_int)]; }; struct setlogin_args { - char namebuf_l_[PADL_(char *)]; char * namebuf; char namebuf_r_[PADR_(char *)]; + char namebuf_l_[PADL_(const char *)]; const char * namebuf; char namebuf_r_[PADR_(const char *)]; }; struct acct_args { - char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)]; + char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)]; }; struct osigpending_args { register_t dummy; @@ -226,33 +226,33 @@ struct sigaltstack_args { struct ioctl_args { char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; char com_l_[PADL_(u_long)]; u_long com; char com_r_[PADR_(u_long)]; - char data_l_[PADL_(caddr_t)]; caddr_t data; char data_r_[PADR_(caddr_t)]; + char data_l_[PADL_(char *)]; char * data; char data_r_[PADR_(char *)]; }; struct reboot_args { char opt_l_[PADL_(int)]; int opt; char opt_r_[PADR_(int)]; }; struct revoke_args { - char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)]; + char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)]; }; struct symlink_args { - char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)]; - char link_l_[PADL_(char *)]; char * link; char link_r_[PADR_(char *)]; + char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)]; + char link_l_[PADL_(const char *)]; const char * link; char link_r_[PADR_(const char *)]; }; struct readlink_args { - char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)]; + char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)]; char buf_l_[PADL_(char *)]; char * buf; char buf_r_[PADR_(char *)]; char count_l_[PADL_(size_t)]; size_t count; char count_r_[PADR_(size_t)]; }; struct execve_args { - char fname_l_[PADL_(char *)]; char * fname; char fname_r_[PADR_(char *)]; + char fname_l_[PADL_(const char *)]; const char * fname; char fname_r_[PADR_(const char *)]; char argv_l_[PADL_(char **)]; char ** argv; char argv_r_[PADR_(char **)]; char envv_l_[PADL_(char **)]; char ** envv; char envv_r_[PADR_(char **)]; }; struct umask_args { - char newmask_l_[PADL_(int)]; int newmask; char newmask_r_[PADR_(int)]; + char newmask_l_[PADL_(mode_t)]; mode_t newmask; char newmask_r_[PADR_(mode_t)]; }; struct chroot_args { - char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)]; + char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)]; }; struct getpagesize_args { register_t dummy; @@ -314,7 +314,7 @@ struct owait_args { register_t dummy; }; struct swapon_args { - char name_l_[PADL_(char *)]; char * name; char name_r_[PADR_(char *)]; + char name_l_[PADL_(const char *)]; const char * name; char name_r_[PADR_(const char *)]; }; struct getitimer_args { char which_l_[PADL_(u_int)]; u_int which; char which_r_[PADR_(u_int)]; @@ -355,7 +355,7 @@ struct socket_args { }; struct connect_args { char s_l_[PADL_(int)]; int s; char s_r_[PADR_(int)]; - char name_l_[PADL_(caddr_t)]; caddr_t name; char name_r_[PADR_(caddr_t)]; + char name_l_[PADL_(const struct sockaddr *)]; const struct sockaddr * name; char name_r_[PADR_(const struct sockaddr *)]; #ifndef __rtems__ char namelen_l_[PADL_(int)]; int namelen; char namelen_r_[PADR_(int)]; #else /* __rtems__ */ @@ -370,7 +370,7 @@ struct getpriority_args { #endif /* __rtems__ */ struct bind_args { char s_l_[PADL_(int)]; int s; char s_r_[PADR_(int)]; - char name_l_[PADL_(caddr_t)]; caddr_t name; char name_r_[PADR_(caddr_t)]; + char name_l_[PADL_(const struct sockaddr *)]; const struct sockaddr * name; char name_r_[PADR_(const struct sockaddr *)]; #ifndef __rtems__ char namelen_l_[PADL_(int)]; int namelen; char namelen_r_[PADR_(int)]; #else /* __rtems__ */ @@ -381,7 +381,7 @@ struct setsockopt_args { char s_l_[PADL_(int)]; int s; char s_r_[PADR_(int)]; char level_l_[PADL_(int)]; int level; char level_r_[PADR_(int)]; char name_l_[PADL_(int)]; int name; char name_r_[PADR_(int)]; - char val_l_[PADL_(caddr_t)]; caddr_t val; char val_r_[PADR_(caddr_t)]; + char val_l_[PADL_(const void *)]; const void * val; char val_r_[PADR_(const void *)]; #ifndef __rtems__ char valsize_l_[PADL_(int)]; int valsize; char valsize_r_[PADR_(int)]; #else /* __rtems__ */ @@ -406,7 +406,7 @@ struct getsockopt_args { char s_l_[PADL_(int)]; int s; char s_r_[PADR_(int)]; char level_l_[PADL_(int)]; int level; char level_r_[PADR_(int)]; char name_l_[PADL_(int)]; int name; char name_r_[PADR_(int)]; - char val_l_[PADL_(caddr_t)]; caddr_t val; char val_r_[PADR_(caddr_t)]; + char val_l_[PADL_(void *)]; void * val; char val_r_[PADR_(void *)]; #ifndef __rtems__ char avalsize_l_[PADL_(int *)]; int * avalsize; char avalsize_r_[PADR_(int *)]; #else /* __rtems__ */ @@ -435,7 +435,7 @@ struct fchown_args { }; struct fchmod_args { char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; - char mode_l_[PADL_(int)]; int mode; char mode_r_[PADR_(int)]; + char mode_l_[PADL_(mode_t)]; mode_t mode; char mode_r_[PADR_(mode_t)]; }; struct setreuid_args { char ruid_l_[PADL_(int)]; int ruid; char ruid_r_[PADR_(int)]; @@ -446,24 +446,24 @@ struct setregid_args { char egid_l_[PADL_(int)]; int egid; char egid_r_[PADR_(int)]; }; struct rename_args { - char from_l_[PADL_(char *)]; char * from; char from_r_[PADR_(char *)]; - char to_l_[PADL_(char *)]; char * to; char to_r_[PADR_(char *)]; + char from_l_[PADL_(const char *)]; const char * from; char from_r_[PADR_(const char *)]; + char to_l_[PADL_(const char *)]; const char * to; char to_r_[PADR_(const char *)]; }; struct flock_args { char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; char how_l_[PADL_(int)]; int how; char how_r_[PADR_(int)]; }; struct mkfifo_args { - char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)]; - char mode_l_[PADL_(int)]; int mode; char mode_r_[PADR_(int)]; + char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)]; + char mode_l_[PADL_(mode_t)]; mode_t mode; char mode_r_[PADR_(mode_t)]; }; #endif /* __rtems__ */ struct sendto_args { char s_l_[PADL_(int)]; int s; char s_r_[PADR_(int)]; - char buf_l_[PADL_(caddr_t)]; caddr_t buf; char buf_r_[PADR_(caddr_t)]; + char buf_l_[PADL_(const void *)]; const void * buf; char buf_r_[PADR_(const void *)]; char len_l_[PADL_(size_t)]; size_t len; char len_r_[PADR_(size_t)]; char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)]; - char to_l_[PADL_(caddr_t)]; caddr_t to; char to_r_[PADR_(caddr_t)]; + char to_l_[PADL_(const struct sockaddr *)]; const struct sockaddr * to; char to_r_[PADR_(const struct sockaddr *)]; #ifndef __rtems__ char tolen_l_[PADL_(int)]; int tolen; char tolen_r_[PADR_(int)]; #else /* __rtems__ */ @@ -482,14 +482,14 @@ struct socketpair_args { }; #ifndef __rtems__ struct mkdir_args { - char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)]; - char mode_l_[PADL_(int)]; int mode; char mode_r_[PADR_(int)]; + char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)]; + char mode_l_[PADL_(mode_t)]; mode_t mode; char mode_r_[PADR_(mode_t)]; }; struct rmdir_args { - char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)]; + char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)]; }; struct utimes_args { - char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)]; + char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)]; char tptr_l_[PADL_(struct timeval *)]; struct timeval * tptr; char tptr_r_[PADR_(struct timeval *)]; }; struct adjtime_args { @@ -503,10 +503,10 @@ struct setsid_args { register_t dummy; }; struct quotactl_args { - char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)]; + char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)]; char cmd_l_[PADL_(int)]; int cmd; char cmd_r_[PADR_(int)]; char uid_l_[PADL_(int)]; int uid; char uid_r_[PADR_(int)]; - char arg_l_[PADL_(caddr_t)]; caddr_t arg; char arg_r_[PADR_(caddr_t)]; + char arg_l_[PADL_(void *)]; void * arg; char arg_r_[PADR_(void *)]; }; struct oquota_args { register_t dummy; @@ -519,14 +519,14 @@ struct nlm_syscall_args { }; struct nfssvc_args { char flag_l_[PADL_(int)]; int flag; char flag_r_[PADR_(int)]; - char argp_l_[PADL_(caddr_t)]; caddr_t argp; char argp_r_[PADR_(caddr_t)]; + char argp_l_[PADL_(void *)]; void * argp; char argp_r_[PADR_(void *)]; }; struct lgetfh_args { - char fname_l_[PADL_(char *)]; char * fname; char fname_r_[PADR_(char *)]; + char fname_l_[PADL_(const char *)]; const char * fname; char fname_r_[PADR_(const char *)]; char fhp_l_[PADL_(struct fhandle *)]; struct fhandle * fhp; char fhp_r_[PADR_(struct fhandle *)]; }; struct getfh_args { - char fname_l_[PADL_(char *)]; char * fname; char fname_r_[PADR_(char *)]; + char fname_l_[PADL_(const char *)]; const char * fname; char fname_r_[PADR_(const char *)]; char fhp_l_[PADL_(struct fhandle *)]; struct fhandle * fhp; char fhp_r_[PADR_(struct fhandle *)]; }; struct sysarch_args { @@ -577,7 +577,7 @@ struct seteuid_args { char euid_l_[PADL_(uid_t)]; uid_t euid; char euid_r_[PADR_(uid_t)]; }; struct pathconf_args { - char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)]; + char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)]; char name_l_[PADL_(int)]; int name; char name_r_[PADR_(int)]; }; struct fpathconf_args { @@ -609,7 +609,7 @@ struct munlock_args { char len_l_[PADL_(size_t)]; size_t len; char len_r_[PADR_(size_t)]; }; struct undelete_args { - char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)]; + char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)]; }; struct futimes_args { char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; @@ -737,7 +737,7 @@ struct issetugid_args { register_t dummy; }; struct lchown_args { - char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)]; + char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)]; char uid_l_[PADL_(int)]; int uid; char uid_r_[PADR_(int)]; char gid_l_[PADL_(int)]; int gid; char gid_r_[PADR_(int)]; }; @@ -754,11 +754,11 @@ struct lio_listio_args { char sig_l_[PADL_(struct sigevent *)]; struct sigevent * sig; char sig_r_[PADR_(struct sigevent *)]; }; struct lchmod_args { - char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)]; + char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)]; char mode_l_[PADL_(mode_t)]; mode_t mode; char mode_r_[PADR_(mode_t)]; }; struct lutimes_args { - char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)]; + char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)]; char tptr_l_[PADL_(struct timeval *)]; struct timeval * tptr; char tptr_r_[PADR_(struct timeval *)]; }; struct preadv_args { @@ -1024,7 +1024,7 @@ struct __setugid_args { char flag_l_[PADL_(int)]; int flag; char flag_r_[PADR_(int)]; }; struct eaccess_args { - char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)]; + char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)]; char amode_l_[PADL_(int)]; int amode; char amode_r_[PADR_(int)]; }; struct afs3_syscall_args { @@ -1156,7 +1156,7 @@ struct extattr_delete_link_args { char attrname_l_[PADL_(const char *)]; const char * attrname; char attrname_r_[PADR_(const char *)]; }; struct __mac_execve_args { - char fname_l_[PADL_(char *)]; char * fname; char fname_r_[PADR_(char *)]; + char fname_l_[PADL_(const char *)]; const char * fname; char fname_r_[PADR_(const char *)]; char argv_l_[PADL_(char **)]; char ** argv; char argv_r_[PADR_(char **)]; char envv_l_[PADL_(char **)]; char ** envv; char envv_r_[PADR_(char **)]; char mac_p_l_[PADL_(struct mac *)]; struct mac * mac_p; char mac_p_r_[PADR_(struct mac *)]; @@ -1285,7 +1285,7 @@ struct setaudit_addr_args { char length_l_[PADL_(u_int)]; u_int length; char length_r_[PADR_(u_int)]; }; struct auditctl_args { - char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)]; + char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)]; }; struct _umtx_op_args { char obj_l_[PADL_(void *)]; void * obj; char obj_r_[PADR_(void *)]; @@ -1359,9 +1359,9 @@ struct sctp_peeloff_args { }; struct sctp_generic_sendmsg_args { char sd_l_[PADL_(int)]; int sd; char sd_r_[PADR_(int)]; - char msg_l_[PADL_(caddr_t)]; caddr_t msg; char msg_r_[PADR_(caddr_t)]; + char msg_l_[PADL_(void *)]; void * msg; char msg_r_[PADR_(void *)]; char mlen_l_[PADL_(int)]; int mlen; char mlen_r_[PADR_(int)]; - char to_l_[PADL_(caddr_t)]; caddr_t to; char to_r_[PADR_(caddr_t)]; + char to_l_[PADL_(struct sockaddr *)]; struct sockaddr * to; char to_r_[PADR_(struct sockaddr *)]; char tolen_l_[PADL_(__socklen_t)]; __socklen_t tolen; char tolen_r_[PADR_(__socklen_t)]; char sinfo_l_[PADL_(struct sctp_sndrcvinfo *)]; struct sctp_sndrcvinfo * sinfo; char sinfo_r_[PADR_(struct sctp_sndrcvinfo *)]; char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)]; @@ -1370,7 +1370,7 @@ struct sctp_generic_sendmsg_iov_args { char sd_l_[PADL_(int)]; int sd; char sd_r_[PADR_(int)]; char iov_l_[PADL_(struct iovec *)]; struct iovec * iov; char iov_r_[PADR_(struct iovec *)]; char iovlen_l_[PADL_(int)]; int iovlen; char iovlen_r_[PADR_(int)]; - char to_l_[PADL_(caddr_t)]; caddr_t to; char to_r_[PADR_(caddr_t)]; + char to_l_[PADL_(struct sockaddr *)]; struct sockaddr * to; char to_r_[PADR_(struct sockaddr *)]; char tolen_l_[PADL_(__socklen_t)]; __socklen_t tolen; char tolen_r_[PADR_(__socklen_t)]; char sinfo_l_[PADL_(struct sctp_sndrcvinfo *)]; struct sctp_sndrcvinfo * sinfo; char sinfo_r_[PADR_(struct sctp_sndrcvinfo *)]; char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)]; @@ -1397,7 +1397,7 @@ struct pwrite_args { char offset_l_[PADL_(off_t)]; off_t offset; char offset_r_[PADR_(off_t)]; }; struct mmap_args { - char addr_l_[PADL_(caddr_t)]; caddr_t addr; char addr_r_[PADR_(caddr_t)]; + char addr_l_[PADL_(void *)]; void * addr; char addr_r_[PADR_(void *)]; char len_l_[PADL_(size_t)]; size_t len; char len_r_[PADR_(size_t)]; char prot_l_[PADL_(int)]; int prot; char prot_r_[PADR_(int)]; char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)]; @@ -1410,7 +1410,7 @@ struct lseek_args { char whence_l_[PADL_(int)]; int whence; char whence_r_[PADR_(int)]; }; struct truncate_args { - char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)]; + char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)]; char length_l_[PADL_(off_t)]; off_t length; char length_r_[PADR_(off_t)]; }; struct ftruncate_args { @@ -1460,19 +1460,19 @@ struct cpuset_setaffinity_args { }; struct faccessat_args { char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; - char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)]; + char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)]; char amode_l_[PADL_(int)]; int amode; char amode_r_[PADR_(int)]; char flag_l_[PADL_(int)]; int flag; char flag_r_[PADR_(int)]; }; struct fchmodat_args { char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; - char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)]; + char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)]; char mode_l_[PADL_(mode_t)]; mode_t mode; char mode_r_[PADR_(mode_t)]; char flag_l_[PADL_(int)]; int flag; char flag_r_[PADR_(int)]; }; struct fchownat_args { char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; - char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)]; + char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)]; char uid_l_[PADL_(uid_t)]; uid_t uid; char uid_r_[PADR_(uid_t)]; char gid_l_[PADL_(gid_t)]; gid_t gid; char gid_r_[PADR_(gid_t)]; char flag_l_[PADL_(int)]; int flag; char flag_r_[PADR_(int)]; @@ -1484,59 +1484,59 @@ struct fexecve_args { }; struct futimesat_args { char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; - char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)]; + char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)]; char times_l_[PADL_(struct timeval *)]; struct timeval * times; char times_r_[PADR_(struct timeval *)]; }; struct linkat_args { char fd1_l_[PADL_(int)]; int fd1; char fd1_r_[PADR_(int)]; - char path1_l_[PADL_(char *)]; char * path1; char path1_r_[PADR_(char *)]; + char path1_l_[PADL_(const char *)]; const char * path1; char path1_r_[PADR_(const char *)]; char fd2_l_[PADL_(int)]; int fd2; char fd2_r_[PADR_(int)]; - char path2_l_[PADL_(char *)]; char * path2; char path2_r_[PADR_(char *)]; + char path2_l_[PADL_(const char *)]; const char * path2; char path2_r_[PADR_(const char *)]; char flag_l_[PADL_(int)]; int flag; char flag_r_[PADR_(int)]; }; struct mkdirat_args { char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; - char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)]; + char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)]; char mode_l_[PADL_(mode_t)]; mode_t mode; char mode_r_[PADR_(mode_t)]; }; struct mkfifoat_args { char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; - char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)]; + char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)]; char mode_l_[PADL_(mode_t)]; mode_t mode; char mode_r_[PADR_(mode_t)]; }; struct openat_args { char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; - char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)]; + char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)]; char flag_l_[PADL_(int)]; int flag; char flag_r_[PADR_(int)]; char mode_l_[PADL_(mode_t)]; mode_t mode; char mode_r_[PADR_(mode_t)]; }; struct readlinkat_args { char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; - char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)]; + char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)]; char buf_l_[PADL_(char *)]; char * buf; char buf_r_[PADR_(char *)]; char bufsize_l_[PADL_(size_t)]; size_t bufsize; char bufsize_r_[PADR_(size_t)]; }; struct renameat_args { char oldfd_l_[PADL_(int)]; int oldfd; char oldfd_r_[PADR_(int)]; - char old_l_[PADL_(char *)]; char * old; char old_r_[PADR_(char *)]; + char old_l_[PADL_(const char *)]; const char * old; char old_r_[PADR_(const char *)]; char newfd_l_[PADL_(int)]; int newfd; char newfd_r_[PADR_(int)]; - char new_l_[PADL_(char *)]; char * new; char new_r_[PADR_(char *)]; + char new_l_[PADL_(const char *)]; const char * new; char new_r_[PADR_(const char *)]; }; struct symlinkat_args { - char path1_l_[PADL_(char *)]; char * path1; char path1_r_[PADR_(char *)]; + char path1_l_[PADL_(const char *)]; const char * path1; char path1_r_[PADR_(const char *)]; char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; - char path2_l_[PADL_(char *)]; char * path2; char path2_r_[PADR_(char *)]; + char path2_l_[PADL_(const char *)]; const char * path2; char path2_r_[PADR_(const char *)]; }; struct unlinkat_args { char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; - char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)]; + char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)]; char flag_l_[PADL_(int)]; int flag; char flag_r_[PADR_(int)]; }; struct posix_openpt_args { char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)]; }; struct gssd_syscall_args { - char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)]; + char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)]; }; struct jail_get_args { char iovp_l_[PADL_(struct iovec *)]; struct iovec * iovp; char iovp_r_[PADR_(struct iovec *)]; @@ -1571,7 +1571,7 @@ struct shmctl_args { char buf_l_[PADL_(struct shmid_ds *)]; struct shmid_ds * buf; char buf_r_[PADR_(struct shmid_ds *)]; }; struct lpathconf_args { - char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)]; + char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)]; char name_l_[PADL_(int)]; int name; char name_r_[PADR_(int)]; }; struct __cap_rights_get_args { @@ -1686,13 +1686,13 @@ struct cap_fcntls_get_args { struct bindat_args { char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; char s_l_[PADL_(int)]; int s; char s_r_[PADR_(int)]; - char name_l_[PADL_(caddr_t)]; caddr_t name; char name_r_[PADR_(caddr_t)]; + char name_l_[PADL_(const struct sockaddr *)]; const struct sockaddr * name; char name_r_[PADR_(const struct sockaddr *)]; char namelen_l_[PADL_(int)]; int namelen; char namelen_r_[PADR_(int)]; }; struct connectat_args { char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; char s_l_[PADL_(int)]; int s; char s_r_[PADR_(int)]; - char name_l_[PADL_(caddr_t)]; caddr_t name; char name_r_[PADR_(caddr_t)]; + char name_l_[PADL_(const struct sockaddr *)]; const struct sockaddr * name; char name_r_[PADR_(const struct sockaddr *)]; char namelen_l_[PADL_(int)]; int namelen; char namelen_r_[PADR_(int)]; }; struct chflagsat_args { @@ -1732,7 +1732,7 @@ struct futimens_args { }; struct utimensat_args { char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; - char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)]; + char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)]; char times_l_[PADL_(struct timespec *)]; struct timespec * times; char times_r_[PADR_(struct timespec *)]; char flag_l_[PADL_(int)]; int flag; char flag_r_[PADR_(int)]; }; @@ -1745,7 +1745,7 @@ struct fstat_args { }; struct fstatat_args { char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; - char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)]; + char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)]; char buf_l_[PADL_(struct stat *)]; struct stat * buf; char buf_r_[PADR_(struct stat *)]; char flag_l_[PADL_(int)]; int flag; char flag_r_[PADR_(int)]; }; @@ -1760,7 +1760,7 @@ struct getdirentries_args { char basep_l_[PADL_(off_t *)]; off_t * basep; char basep_r_[PADR_(off_t *)]; }; struct statfs_args { - char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)]; + char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)]; char buf_l_[PADL_(struct statfs *)]; struct statfs * buf; char buf_r_[PADR_(struct statfs *)]; }; struct fstatfs_args { @@ -1778,7 +1778,7 @@ struct fhstatfs_args { }; struct mknodat_args { char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; - char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)]; + char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)]; char mode_l_[PADL_(mode_t)]; mode_t mode; char mode_r_[PADR_(mode_t)]; char dev_l_[PADL_(dev_t)]; dev_t dev; char dev_r_[PADR_(dev_t)]; }; @@ -2201,7 +2201,7 @@ int sys_getrandom(struct thread *, struct getrandom_args *); #ifdef COMPAT_43 struct ocreat_args { - char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)]; + char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)]; char mode_l_[PADL_(int)]; int mode; char mode_r_[PADR_(int)]; }; struct olseek_args { @@ -2210,11 +2210,11 @@ struct olseek_args { char whence_l_[PADL_(int)]; int whence; char whence_r_[PADR_(int)]; }; struct ostat_args { - char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)]; + char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)]; char ub_l_[PADL_(struct ostat *)]; struct ostat * ub; char ub_r_[PADR_(struct ostat *)]; }; struct olstat_args { - char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)]; + char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)]; char ub_l_[PADL_(struct ostat *)]; struct ostat * ub; char ub_r_[PADR_(struct ostat *)]; }; struct osigaction_args { @@ -2254,13 +2254,13 @@ struct sethostname_args { }; struct osend_args { char s_l_[PADL_(int)]; int s; char s_r_[PADR_(int)]; - char buf_l_[PADL_(caddr_t)]; caddr_t buf; char buf_r_[PADR_(caddr_t)]; + char buf_l_[PADL_(const void *)]; const void * buf; char buf_r_[PADR_(const void *)]; char len_l_[PADL_(int)]; int len; char len_r_[PADR_(int)]; char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)]; }; struct orecv_args { char s_l_[PADL_(int)]; int s; char s_r_[PADR_(int)]; - char buf_l_[PADL_(caddr_t)]; caddr_t buf; char buf_r_[PADR_(caddr_t)]; + char buf_l_[PADL_(void *)]; void * buf; char buf_r_[PADR_(void *)]; char len_l_[PADL_(int)]; int len; char len_r_[PADR_(int)]; char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)]; }; @@ -2292,11 +2292,11 @@ struct orecvmsg_args { }; struct osendmsg_args { char s_l_[PADL_(int)]; int s; char s_r_[PADR_(int)]; - char msg_l_[PADL_(caddr_t)]; caddr_t msg; char msg_r_[PADR_(caddr_t)]; + char msg_l_[PADL_(const void *)]; const void * msg; char msg_r_[PADR_(const void *)]; char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)]; }; struct otruncate_args { - char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)]; + char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)]; char length_l_[PADL_(long)]; long length; char length_r_[PADR_(long)]; }; struct oftruncate_args { @@ -2305,7 +2305,7 @@ struct oftruncate_args { }; struct ogetpeername_args { char fdes_l_[PADL_(int)]; int fdes; char fdes_r_[PADR_(int)]; - char asa_l_[PADL_(caddr_t)]; caddr_t asa; char asa_r_[PADR_(caddr_t)]; + char asa_l_[PADL_(struct sockaddr *)]; struct sockaddr * asa; char asa_r_[PADR_(struct sockaddr *)]; char alen_l_[PADL_(int *)]; int * alen; char alen_r_[PADR_(int *)]; }; struct osethostid_args { @@ -2378,7 +2378,7 @@ struct freebsd4_getfsstat_args { char mode_l_[PADL_(int)]; int mode; char mode_r_[PADR_(int)]; }; struct freebsd4_statfs_args { - char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)]; + char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)]; char buf_l_[PADL_(struct ostatfs *)]; struct ostatfs * buf; char buf_r_[PADR_(struct ostatfs *)]; }; struct freebsd4_fstatfs_args { @@ -2448,7 +2448,7 @@ struct freebsd6_pwrite_args { char offset_l_[PADL_(off_t)]; off_t offset; char offset_r_[PADR_(off_t)]; }; struct freebsd6_mmap_args { - char addr_l_[PADL_(caddr_t)]; caddr_t addr; char addr_r_[PADR_(caddr_t)]; + char addr_l_[PADL_(void *)]; void * addr; char addr_r_[PADR_(void *)]; char len_l_[PADL_(size_t)]; size_t len; char len_r_[PADR_(size_t)]; char prot_l_[PADL_(int)]; int prot; char prot_r_[PADR_(int)]; char flags_l_[PADL_(int)]; int flags; char flags_r_[PADR_(int)]; @@ -2463,7 +2463,7 @@ struct freebsd6_lseek_args { char whence_l_[PADL_(int)]; int whence; char whence_r_[PADR_(int)]; }; struct freebsd6_truncate_args { - char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)]; + char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)]; char pad_l_[PADL_(int)]; int pad; char pad_r_[PADR_(int)]; char length_l_[PADL_(off_t)]; off_t length; char length_r_[PADR_(off_t)]; }; @@ -2532,15 +2532,15 @@ int freebsd10_pipe(struct thread *, struct freebsd10_pipe_args *); #ifdef COMPAT_FREEBSD11 struct freebsd11_mknod_args { - char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)]; + char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)]; char mode_l_[PADL_(int)]; int mode; char mode_r_[PADR_(int)]; - char dev_l_[PADL_(int)]; int dev; char dev_r_[PADR_(int)]; + char dev_l_[PADL_(uint32_t)]; uint32_t dev; char dev_r_[PADR_(uint32_t)]; }; struct freebsd11_vadvise_args { char anom_l_[PADL_(int)]; int anom; char anom_r_[PADR_(int)]; }; struct freebsd11_stat_args { - char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)]; + char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)]; char ub_l_[PADL_(struct freebsd11_stat *)]; struct freebsd11_stat * ub; char ub_r_[PADR_(struct freebsd11_stat *)]; }; struct freebsd11_fstat_args { @@ -2548,7 +2548,7 @@ struct freebsd11_fstat_args { char sb_l_[PADL_(struct freebsd11_stat *)]; struct freebsd11_stat * sb; char sb_r_[PADR_(struct freebsd11_stat *)]; }; struct freebsd11_lstat_args { - char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)]; + char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)]; char ub_l_[PADL_(struct freebsd11_stat *)]; struct freebsd11_stat * ub; char ub_r_[PADR_(struct freebsd11_stat *)]; }; struct freebsd11_getdirentries_args { @@ -2563,7 +2563,7 @@ struct freebsd11_getdents_args { char count_l_[PADL_(size_t)]; size_t count; char count_r_[PADR_(size_t)]; }; struct freebsd11_nstat_args { - char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)]; + char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)]; char ub_l_[PADL_(struct nstat *)]; struct nstat * ub; char ub_r_[PADR_(struct nstat *)]; }; struct freebsd11_nfstat_args { @@ -2571,7 +2571,7 @@ struct freebsd11_nfstat_args { char sb_l_[PADL_(struct nstat *)]; struct nstat * sb; char sb_r_[PADR_(struct nstat *)]; }; struct freebsd11_nlstat_args { - char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)]; + char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)]; char ub_l_[PADL_(struct nstat *)]; struct nstat * ub; char ub_r_[PADR_(struct nstat *)]; }; struct freebsd11_fhstat_args { @@ -2592,7 +2592,7 @@ struct freebsd11_getfsstat_args { char mode_l_[PADL_(int)]; int mode; char mode_r_[PADR_(int)]; }; struct freebsd11_statfs_args { - char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)]; + char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)]; char buf_l_[PADL_(struct freebsd11_statfs *)]; struct freebsd11_statfs * buf; char buf_r_[PADR_(struct freebsd11_statfs *)]; }; struct freebsd11_fstatfs_args { @@ -2605,13 +2605,13 @@ struct freebsd11_fhstatfs_args { }; struct freebsd11_fstatat_args { char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; - char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)]; + char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)]; char buf_l_[PADL_(struct freebsd11_stat *)]; struct freebsd11_stat * buf; char buf_r_[PADR_(struct freebsd11_stat *)]; char flag_l_[PADL_(int)]; int flag; char flag_r_[PADR_(int)]; }; struct freebsd11_mknodat_args { char fd_l_[PADL_(int)]; int fd; char fd_r_[PADR_(int)]; - char path_l_[PADL_(char *)]; char * path; char path_r_[PADR_(char *)]; + char path_l_[PADL_(const char *)]; const char * path; char path_r_[PADR_(const char *)]; char mode_l_[PADL_(mode_t)]; mode_t mode; char mode_r_[PADR_(mode_t)]; char dev_l_[PADL_(uint32_t)]; uint32_t dev; char dev_r_[PADR_(uint32_t)]; }; diff --git a/freebsd/sys/sys/systm.h b/freebsd/sys/sys/systm.h index 9f9b00bd..d74a1e4b 100644 --- a/freebsd/sys/sys/systm.h +++ b/freebsd/sys/sys/systm.h @@ -673,6 +673,9 @@ void _gone_in_dev(struct device *dev, int major, const char *msg); #endif #define gone_in(major, msg) __gone_ok(major, msg) _gone_in(major, msg) #define gone_in_dev(dev, major, msg) __gone_ok(major, msg) _gone_in_dev(dev, major, msg) +#define gone_by_fcp101_dev(dev) \ + gone_in_dev((dev), 13, \ + "see https://github.com/freebsd/fcp/blob/master/fcp-0101.md") __NULLABILITY_PRAGMA_POP diff --git a/freebsd/sys/sys/tty.h b/freebsd/sys/sys/tty.h index c85ca559..8b458cf1 100644 --- a/freebsd/sys/sys/tty.h +++ b/freebsd/sys/sys/tty.h @@ -133,12 +133,8 @@ struct tty { void *t_hooksoftc; /* (t) Soft config, for hooks. */ struct cdev *t_dev; /* (c) Primary character device. */ -#ifndef PRINTF_BUFR_SIZE -#define TTY_PRINTF_SIZE 256 -#else -#define TTY_PRINTF_SIZE PRINTF_BUFR_SIZE -#endif - char t_prbuf[TTY_PRINTF_SIZE]; /* (t) */ + size_t t_prbufsz; /* (t) SIGINFO buffer size. */ + char t_prbuf[]; /* (t) SIGINFO buffer. */ }; /* diff --git a/freebsd/sys/sys/vnode.h b/freebsd/sys/sys/vnode.h index ac6f4f4e..81f90bde 100644 --- a/freebsd/sys/sys/vnode.h +++ b/freebsd/sys/sys/vnode.h @@ -485,6 +485,7 @@ typedef int vop_bypass_t(struct vop_generic_args *); struct vnodeop_desc { char *vdesc_name; /* a readable name for debugging */ int vdesc_flags; /* VDESC_* flags */ + int vdesc_vop_offset; vop_bypass_t *vdesc_call; /* Function to call */ /* @@ -786,6 +787,7 @@ void vop_rmdir_post(void *a, int rc); void vop_setattr_post(void *a, int rc); void vop_setextattr_post(void *a, int rc); void vop_symlink_post(void *a, int rc); +int vop_sigdefer(struct vop_vector *vop, struct vop_generic_args *a); #ifdef DEBUG_VFS_LOCKS void vop_strategy_pre(void *a); diff --git a/freebsd/sys/vm/uma_core.c b/freebsd/sys/vm/uma_core.c index f1ea87e8..db624654 100644 --- a/freebsd/sys/vm/uma_core.c +++ b/freebsd/sys/vm/uma_core.c @@ -61,6 +61,7 @@ __FBSDID("$FreeBSD$"); #include <sys/param.h> #include <sys/systm.h> #include <sys/bitset.h> +#include <sys/domainset.h> #include <sys/eventhandler.h> #include <sys/kernel.h> #include <sys/types.h> @@ -81,6 +82,7 @@ __FBSDID("$FreeBSD$"); #include <sys/vmmeter.h> #include <vm/vm.h> +#include <vm/vm_domainset.h> #include <vm/vm_object.h> #include <vm/vm_page.h> #include <vm/vm_pageout.h> @@ -517,6 +519,36 @@ bucket_zone_drain(void) zone_drain(ubz->ubz_zone); } +static uma_bucket_t +zone_try_fetch_bucket(uma_zone_t zone, uma_zone_domain_t zdom, const bool ws) +{ + uma_bucket_t bucket; + + ZONE_LOCK_ASSERT(zone); + + if ((bucket = LIST_FIRST(&zdom->uzd_buckets)) != NULL) { + MPASS(zdom->uzd_nitems >= bucket->ub_cnt); + LIST_REMOVE(bucket, ub_link); + zdom->uzd_nitems -= bucket->ub_cnt; + if (ws && zdom->uzd_imin > zdom->uzd_nitems) + zdom->uzd_imin = zdom->uzd_nitems; + } + return (bucket); +} + +static void +zone_put_bucket(uma_zone_t zone, uma_zone_domain_t zdom, uma_bucket_t bucket, + const bool ws) +{ + + ZONE_LOCK_ASSERT(zone); + + LIST_INSERT_HEAD(&zdom->uzd_buckets, bucket, ub_link); + zdom->uzd_nitems += bucket->ub_cnt; + if (ws && zdom->uzd_imax < zdom->uzd_nitems) + zdom->uzd_imax = zdom->uzd_nitems; +} + static void zone_log_warning(uma_zone_t zone) { @@ -567,6 +599,23 @@ uma_timeout(void *unused) } /* + * Update the working set size estimate for the zone's bucket cache. + * The constants chosen here are somewhat arbitrary. With an update period of + * 20s (UMA_TIMEOUT), this estimate is dominated by zone activity over the + * last 100s. + */ +static void +zone_domain_update_wss(uma_zone_domain_t zdom) +{ + long wss; + + MPASS(zdom->uzd_imax >= zdom->uzd_imin); + wss = zdom->uzd_imax - zdom->uzd_imin; + zdom->uzd_imax = zdom->uzd_imin = zdom->uzd_nitems; + zdom->uzd_wss = (3 * wss + 2 * zdom->uzd_wss) / 5; +} + +/* * Routine to perform timeout driven calculations. This expands the * hashes and does per cpu statistics aggregation. * @@ -618,8 +667,14 @@ keg_timeout(uma_keg_t keg) static void zone_timeout(uma_zone_t zone) { + int i; zone_foreach_keg(zone, &keg_timeout); + + ZONE_LOCK(zone); + for (i = 0; i < vm_ndomains; i++) + zone_domain_update_wss(&zone->uz_domain[i]); + ZONE_UNLOCK(zone); } /* @@ -833,16 +888,16 @@ cache_drain_safe_cpu(uma_zone_t zone) cache = &zone->uz_cpu[curcpu]; if (cache->uc_allocbucket) { if (cache->uc_allocbucket->ub_cnt != 0) - LIST_INSERT_HEAD(&zone->uz_domain[domain].uzd_buckets, - cache->uc_allocbucket, ub_link); + zone_put_bucket(zone, &zone->uz_domain[domain], + cache->uc_allocbucket, false); else b1 = cache->uc_allocbucket; cache->uc_allocbucket = NULL; } if (cache->uc_freebucket) { if (cache->uc_freebucket->ub_cnt != 0) - LIST_INSERT_HEAD(&zone->uz_domain[domain].uzd_buckets, - cache->uc_freebucket, ub_link); + zone_put_bucket(zone, &zone->uz_domain[domain], + cache->uc_freebucket, false); else b2 = cache->uc_freebucket; cache->uc_freebucket = NULL; @@ -906,8 +961,8 @@ bucket_cache_drain(uma_zone_t zone) */ for (i = 0; i < vm_ndomains; i++) { zdom = &zone->uz_domain[i]; - while ((bucket = LIST_FIRST(&zdom->uzd_buckets)) != NULL) { - LIST_REMOVE(bucket, ub_link); + while ((bucket = zone_try_fetch_bucket(zone, zdom, false)) != + NULL) { ZONE_UNLOCK(zone); bucket_drain(zone, bucket); bucket_free(zone, bucket, NULL); @@ -1057,6 +1112,8 @@ zone_drain(uma_zone_t zone) /* * Allocate a new slab for a keg. This does not insert the slab onto a list. + * If the allocation was successful, the keg lock will be held upon return, + * otherwise the keg will be left unlocked. * * Arguments: * wait Shall we wait? @@ -1078,13 +1135,12 @@ keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int domain, int wait) KASSERT(domain >= 0 && domain < vm_ndomains, ("keg_alloc_slab: domain %d out of range", domain)); mtx_assert(&keg->uk_lock, MA_OWNED); - slab = NULL; - mem = NULL; allocf = keg->uk_allocf; KEG_UNLOCK(keg); - size = keg->uk_ppera * PAGE_SIZE; + slab = NULL; + mem = NULL; if (keg->uk_flags & UMA_ZONE_OFFPAGE) { slab = zone_alloc_item(keg->uk_slabzone, NULL, domain, wait); if (slab == NULL) @@ -1107,6 +1163,7 @@ keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int domain, int wait) wait |= M_NODUMP; /* zone is passed for legacy reasons. */ + size = keg->uk_ppera * PAGE_SIZE; mem = allocf(zone, size, domain, &flags, wait); if (mem == NULL) { if (keg->uk_flags & UMA_ZONE_OFFPAGE) @@ -1145,20 +1202,18 @@ keg_alloc_slab(uma_keg_t keg, uma_zone_t zone, int domain, int wait) goto out; } } -out: KEG_LOCK(keg); CTR3(KTR_UMA, "keg_alloc_slab: allocated slab %p for %s(%p)", slab, keg->uk_name, keg); - if (slab != NULL) { - if (keg->uk_flags & UMA_ZONE_HASH) - UMA_HASH_INSERT(&keg->uk_hash, slab, mem); + if (keg->uk_flags & UMA_ZONE_HASH) + UMA_HASH_INSERT(&keg->uk_hash, slab, mem); - keg->uk_pages += keg->uk_ppera; - keg->uk_free += keg->uk_ipers; - } + keg->uk_pages += keg->uk_ppera; + keg->uk_free += keg->uk_ipers; +out: return (slab); } @@ -1239,7 +1294,7 @@ page_alloc(uma_zone_t zone, vm_size_t bytes, int domain, uint8_t *pflag, #ifndef __rtems__ *pflag = UMA_SLAB_KERNEL; - p = (void *) kmem_malloc_domain(domain, bytes, wait); + p = (void *)kmem_malloc_domainset(DOMAINSET_FIXED(domain), bytes, wait); #else /* __rtems__ */ *pflag = 0; p = rtems_bsd_page_alloc(bytes, wait); @@ -1643,13 +1698,22 @@ keg_ctor(void *mem, int size, void *udata, int flags) keg->uk_init = arg->uminit; keg->uk_fini = arg->fini; keg->uk_align = arg->align; - keg->uk_cursor = 0; keg->uk_free = 0; keg->uk_reserve = 0; keg->uk_pages = 0; keg->uk_flags = arg->flags; keg->uk_slabzone = NULL; +#ifndef __rtems__ + /* + * We use a global round-robin policy by default. Zones with + * UMA_ZONE_NUMA set will use first-touch instead, in which case the + * iterator is never run. + */ + keg->uk_dr.dr_policy = DOMAINSET_RR(); + keg->uk_dr.dr_iter = 0; +#endif /* __rtems__ */ + /* * The master zone is passed to us at keg-creation time. */ @@ -2657,11 +2721,9 @@ zalloc_start: zdom = &zone->uz_domain[0]; else zdom = &zone->uz_domain[domain]; - if ((bucket = LIST_FIRST(&zdom->uzd_buckets)) != NULL) { + if ((bucket = zone_try_fetch_bucket(zone, zdom, true)) != NULL) { KASSERT(bucket->ub_cnt != 0, ("uma_zalloc_arg: Returning an empty bucket.")); - - LIST_REMOVE(bucket, ub_link); cache->uc_allocbucket = bucket; ZONE_UNLOCK(zone); goto zalloc_start; @@ -2690,6 +2752,7 @@ zalloc_start: critical_enter(); cpu = curcpu; cache = &zone->uz_cpu[cpu]; + /* * See if we lost the race or were migrated. Cache the * initialized bucket to make this less likely or claim @@ -2703,6 +2766,7 @@ zalloc_start: if (cache->uc_allocbucket == NULL) { #endif /* __rtems__ */ cache->uc_allocbucket = bucket; + zdom->uzd_imax += bucket->ub_cnt; } else if ((zone->uz_flags & UMA_ZONE_NOBUCKETCACHE) != 0) { critical_exit(); ZONE_UNLOCK(zone); @@ -2710,7 +2774,7 @@ zalloc_start: bucket_free(zone, bucket, udata); goto zalloc_restart; } else - LIST_INSERT_HEAD(&zdom->uzd_buckets, bucket, ub_link); + zone_put_bucket(zone, zdom, bucket, false); ZONE_UNLOCK(zone); goto zalloc_start; } @@ -2754,7 +2818,7 @@ uma_zalloc_domain(uma_zone_t zone, void *udata, int domain, int flags) * only 'domain'. */ static uma_slab_t -keg_first_slab(uma_keg_t keg, int domain, int rr) +keg_first_slab(uma_keg_t keg, int domain, bool rr) { uma_domain_t dom; uma_slab_t slab; @@ -2785,48 +2849,61 @@ keg_first_slab(uma_keg_t keg, int domain, int rr) } static uma_slab_t -keg_fetch_slab(uma_keg_t keg, uma_zone_t zone, int rdomain, int flags) +keg_fetch_free_slab(uma_keg_t keg, int domain, bool rr, int flags) { + uint32_t reserve; + + mtx_assert(&keg->uk_lock, MA_OWNED); + + reserve = (flags & M_USE_RESERVE) != 0 ? 0 : keg->uk_reserve; + if (keg->uk_free <= reserve) + return (NULL); + return (keg_first_slab(keg, domain, rr)); +} + +static uma_slab_t +keg_fetch_slab(uma_keg_t keg, uma_zone_t zone, int rdomain, const int flags) +{ +#ifndef __rtems__ + struct vm_domainset_iter di; +#endif /* __rtems__ */ uma_domain_t dom; uma_slab_t slab; - int allocflags, domain, reserve, rr, start; + int aflags, domain; + bool rr; +#ifndef __rtems__ +restart: +#endif /* __rtems__ */ mtx_assert(&keg->uk_lock, MA_OWNED); - slab = NULL; - reserve = 0; - allocflags = flags; - if ((flags & M_USE_RESERVE) == 0) - reserve = keg->uk_reserve; /* - * Round-robin for non first-touch zones when there is more than one - * domain. + * Use the keg's policy if upper layers haven't already specified a + * domain (as happens with first-touch zones). + * + * To avoid races we run the iterator with the keg lock held, but that + * means that we cannot allow the vm_domainset layer to sleep. Thus, + * clear M_WAITOK and handle low memory conditions locally. */ #ifndef __rtems__ - if (vm_ndomains == 1) - rdomain = 0; rr = rdomain == UMA_ANYDOMAIN; if (rr) { - start = keg->uk_cursor; - do { - keg->uk_cursor = (keg->uk_cursor + 1) % vm_ndomains; - domain = keg->uk_cursor; - } while (VM_DOMAIN_EMPTY(domain) && domain != start); - domain = start = keg->uk_cursor; - /* Only block on the second pass. */ - if ((flags & (M_WAITOK | M_NOVM)) == M_WAITOK) - allocflags = (allocflags & ~M_WAITOK) | M_NOWAIT; - } else - domain = start = rdomain; + aflags = (flags & ~M_WAITOK) | M_NOWAIT; + vm_domainset_iter_policy_ref_init(&di, &keg->uk_dr, &domain, + &aflags); + } else { + aflags = flags; + domain = rdomain; + } #else /* __rtems__ */ - rr = 1; - domain = start = keg->uk_cursor; + rr = true; + aflags = flags; + domain = 0; #endif /* __rtems__ */ -again: - do { - if (keg->uk_free > reserve && - (slab = keg_first_slab(keg, domain, rr)) != NULL) { + for (;;) { + slab = keg_fetch_free_slab(keg, domain, rr, flags); + if (slab != NULL) { MPASS(slab->us_keg == keg); return (slab); } @@ -2854,7 +2931,7 @@ again: msleep(keg, &keg->uk_lock, PVM, "keglimit", 0); continue; } - slab = keg_alloc_slab(keg, zone, domain, allocflags); + slab = keg_alloc_slab(keg, zone, domain, aflags); /* * If we got a slab here it's safe to mark it partially used * and return. We assume that the caller is going to remove @@ -2866,19 +2943,20 @@ again: LIST_INSERT_HEAD(&dom->ud_part_slab, slab, us_link); return (slab); } + KEG_LOCK(keg); #ifndef __rtems__ - if (rr) { - do { - domain = (domain + 1) % vm_ndomains; - } while (VM_DOMAIN_EMPTY(domain) && domain != start); + if (rr && vm_domainset_iter_policy(&di, &domain) != 0) { + if ((flags & M_WAITOK) != 0) { + KEG_UNLOCK(keg); + vm_wait_doms(&keg->uk_dr.dr_policy->ds_mask); + KEG_LOCK(keg); + goto restart; + } + break; } +#else /* __rtems__ */ + return (NULL); #endif /* __rtems__ */ - } while (domain != start); - - /* Retry domain scan with blocking. */ - if (allocflags != flags) { - allocflags = flags; - goto again; } /* @@ -2886,8 +2964,7 @@ again: * could have while we were unlocked. Check again before we * fail. */ - if (keg->uk_free > reserve && - (slab = keg_first_slab(keg, domain, rr)) != NULL) { + if ((slab = keg_fetch_free_slab(keg, domain, rr, flags)) != NULL) { MPASS(slab->us_keg == keg); return (slab); } @@ -3353,7 +3430,7 @@ zfree_start: bucket_free(zone, bucket, udata); goto zfree_restart; } else - LIST_INSERT_HEAD(&zdom->uzd_buckets, bucket, ub_link); + zone_put_bucket(zone, zdom, bucket, true); } /* @@ -3775,30 +3852,30 @@ uma_zone_reserve_kva(uma_zone_t zone, int count) void uma_prealloc(uma_zone_t zone, int items) { + struct vm_domainset_iter di; uma_domain_t dom; uma_slab_t slab; uma_keg_t keg; - int domain, slabs; + int domain, flags, slabs; keg = zone_first_keg(zone); if (keg == NULL) return; KEG_LOCK(keg); slabs = items / keg->uk_ipers; - domain = 0; if (slabs * keg->uk_ipers < items) slabs++; - while (slabs > 0) { - slab = keg_alloc_slab(keg, zone, domain, M_WAITOK); + flags = M_WAITOK; + vm_domainset_iter_policy_ref_init(&di, &keg->uk_dr, &domain, &flags); + while (slabs-- > 0) { + slab = keg_alloc_slab(keg, zone, domain, flags); if (slab == NULL) - break; + return; MPASS(slab->us_keg == keg); dom = &keg->uk_domain[slab->us_domain]; LIST_INSERT_HEAD(&dom->ud_free_slab, slab, us_link); - slabs--; - do { - domain = (domain + 1) % vm_ndomains; - } while (VM_DOMAIN_EMPTY(domain)); + if (vm_domainset_iter_policy(&di, &domain) != 0) + break; } KEG_UNLOCK(keg); } @@ -3819,6 +3896,7 @@ uma_reclaim_locked(bool kmem_danger) zone_foreach(zone_drain); } #endif /* __rtems__ */ + /* * Some slabs may have been freed but this zone will be visited early * we visit again so that we can free pages that are empty once other @@ -3891,6 +3969,7 @@ uma_zone_exhausted_nolock(uma_zone_t zone) void * uma_large_malloc_domain(vm_size_t size, int domain, int wait) { + struct domainset *policy; vm_offset_t addr; uma_slab_t slab; @@ -3902,10 +3981,9 @@ uma_large_malloc_domain(vm_size_t size, int domain, int wait) slab = zone_alloc_item(slabzone, NULL, domain, wait); if (slab == NULL) return (NULL); - if (domain == UMA_ANYDOMAIN) - addr = kmem_malloc(size, wait); - else - addr = kmem_malloc_domain(domain, size, wait); + policy = (domain == UMA_ANYDOMAIN) ? DOMAINSET_RR() : + DOMAINSET_FIXED(domain); + addr = kmem_malloc_domainset(policy, size, wait); if (addr != 0) { vsetslab(addr, slab); slab->us_data = (void *)addr; @@ -4057,7 +4135,7 @@ uma_print_zone(uma_zone_t zone) * directly so that we don't have to. */ static void -uma_zone_sumstat(uma_zone_t z, int *cachefreep, uint64_t *allocsp, +uma_zone_sumstat(uma_zone_t z, long *cachefreep, uint64_t *allocsp, uint64_t *freesp, uint64_t *sleepsp) { uma_cache_t cache; @@ -4113,7 +4191,6 @@ sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS) struct uma_stream_header ush; struct uma_type_header uth; struct uma_percpu_stat *ups; - uma_bucket_t bucket; uma_zone_domain_t zdom; struct sbuf sbuf; uma_cache_t cache; @@ -4173,9 +4250,7 @@ sysctl_vm_zone_stats(SYSCTL_HANDLER_ARGS) for (i = 0; i < vm_ndomains; i++) { zdom = &z->uz_domain[i]; - LIST_FOREACH(bucket, &zdom->uzd_buckets, - ub_link) - uth.uth_zone_free += bucket->ub_cnt; + uth.uth_zone_free += zdom->uzd_nitems; } uth.uth_allocs = z->uz_allocs; uth.uth_frees = z->uz_frees; @@ -4376,12 +4451,11 @@ uma_dbg_free(uma_zone_t zone, uma_slab_t slab, void *item) #ifdef DDB DB_SHOW_COMMAND(uma, db_show_uma) { - uma_bucket_t bucket; uma_keg_t kz; uma_zone_t z; - uma_zone_domain_t zdom; uint64_t allocs, frees, sleeps; - int cachefree, i; + long cachefree; + int i; db_printf("%18s %8s %8s %8s %12s %8s %8s\n", "Zone", "Size", "Used", "Free", "Requests", "Sleeps", "Bucket"); @@ -4398,13 +4472,10 @@ DB_SHOW_COMMAND(uma, db_show_uma) if (!((z->uz_flags & UMA_ZONE_SECONDARY) && (LIST_FIRST(&kz->uk_zones) != z))) cachefree += kz->uk_free; - for (i = 0; i < vm_ndomains; i++) { - zdom = &z->uz_domain[i]; - LIST_FOREACH(bucket, &zdom->uzd_buckets, - ub_link) - cachefree += bucket->ub_cnt; - } - db_printf("%18s %8ju %8jd %8d %12ju %8ju %8u\n", + for (i = 0; i < vm_ndomains; i++) + cachefree += z->uz_domain[i].uzd_nitems; + + db_printf("%18s %8ju %8jd %8ld %12ju %8ju %8u\n", z->uz_name, (uintmax_t)kz->uk_size, (intmax_t)(allocs - frees), cachefree, (uintmax_t)allocs, sleeps, z->uz_count); @@ -4416,22 +4487,18 @@ DB_SHOW_COMMAND(uma, db_show_uma) DB_SHOW_COMMAND(umacache, db_show_umacache) { - uma_bucket_t bucket; uma_zone_t z; - uma_zone_domain_t zdom; uint64_t allocs, frees; - int cachefree, i; + long cachefree; + int i; db_printf("%18s %8s %8s %8s %12s %8s\n", "Zone", "Size", "Used", "Free", "Requests", "Bucket"); LIST_FOREACH(z, &uma_cachezones, uz_link) { uma_zone_sumstat(z, &cachefree, &allocs, &frees, NULL); - for (i = 0; i < vm_ndomains; i++) { - zdom = &z->uz_domain[i]; - LIST_FOREACH(bucket, &zdom->uzd_buckets, ub_link) - cachefree += bucket->ub_cnt; - } - db_printf("%18s %8ju %8jd %8d %12ju %8u\n", + for (i = 0; i < vm_ndomains; i++) + cachefree += z->uz_domain[i].uzd_nitems; + db_printf("%18s %8ju %8jd %8ld %12ju %8u\n", z->uz_name, (uintmax_t)z->uz_size, (intmax_t)(allocs - frees), cachefree, (uintmax_t)allocs, z->uz_count); diff --git a/freebsd/sys/vm/uma_int.h b/freebsd/sys/vm/uma_int.h index ff638095..0fbc0512 100644 --- a/freebsd/sys/vm/uma_int.h +++ b/freebsd/sys/vm/uma_int.h @@ -31,6 +31,7 @@ */ #include <sys/_bitset.h> +#include <sys/_domainset.h> #include <sys/_task.h> /* @@ -226,7 +227,9 @@ struct uma_keg { struct uma_hash uk_hash; LIST_HEAD(,uma_zone) uk_zones; /* Keg's zones */ - uint32_t uk_cursor; /* Domain alloc cursor. */ +#ifndef __rtems__ + struct domainset_ref uk_dr; /* Domain selection policy. */ +#endif /* __rtems__ */ uint32_t uk_align; /* Alignment mask */ uint32_t uk_pages; /* Total page count */ uint32_t uk_free; /* Count of items free in slabs */ @@ -307,6 +310,10 @@ typedef struct uma_klink *uma_klink_t; struct uma_zone_domain { LIST_HEAD(,uma_bucket) uzd_buckets; /* full buckets */ + long uzd_nitems; /* total item count */ + long uzd_imax; /* maximum item count this period */ + long uzd_imin; /* minimum item count this period */ + long uzd_wss; /* working set size estimate */ }; typedef struct uma_zone_domain * uma_zone_domain_t; @@ -430,11 +437,12 @@ void uma_large_free(uma_slab_t slab); mtx_init(&(z)->uz_lock, (z)->uz_name, \ "UMA zone", MTX_DEF | MTX_DUPOK); \ } while (0) - + #define ZONE_LOCK(z) mtx_lock((z)->uz_lockptr) #define ZONE_TRYLOCK(z) mtx_trylock((z)->uz_lockptr) #define ZONE_UNLOCK(z) mtx_unlock((z)->uz_lockptr) #define ZONE_LOCK_FINI(z) mtx_destroy(&(z)->uz_lock) +#define ZONE_LOCK_ASSERT(z) mtx_assert((z)->uz_lockptr, MA_OWNED) /* * Find a slab within a hash table. This is used for OFFPAGE zones to lookup diff --git a/freebsd/sys/vm/vm_extern.h b/freebsd/sys/vm/vm_extern.h index b2f1d726..52884357 100644 --- a/freebsd/sys/vm/vm_extern.h +++ b/freebsd/sys/vm/vm_extern.h @@ -44,6 +44,7 @@ struct vmem; #ifdef _KERNEL struct cdev; struct cdevsw; +struct domainset; /* These operate on kernel virtual addresses only. */ vm_offset_t kva_alloc(vm_size_t); @@ -56,16 +57,17 @@ void kmap_free_wakeup(vm_map_t, vm_offset_t, vm_size_t); /* These operate on virtual addresses backed by memory. */ vm_offset_t kmem_alloc_attr(vm_size_t size, int flags, vm_paddr_t low, vm_paddr_t high, vm_memattr_t memattr); -vm_offset_t kmem_alloc_attr_domain(int domain, vm_size_t size, int flags, - vm_paddr_t low, vm_paddr_t high, vm_memattr_t memattr); +vm_offset_t kmem_alloc_attr_domainset(struct domainset *ds, vm_size_t size, + int flags, vm_paddr_t low, vm_paddr_t high, vm_memattr_t memattr); vm_offset_t kmem_alloc_contig(vm_size_t size, int flags, vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary, vm_memattr_t memattr); -vm_offset_t kmem_alloc_contig_domain(int domain, vm_size_t size, int flags, - vm_paddr_t low, vm_paddr_t high, u_long alignment, vm_paddr_t boundary, - vm_memattr_t memattr); +vm_offset_t kmem_alloc_contig_domainset(struct domainset *ds, vm_size_t size, + int flags, vm_paddr_t low, vm_paddr_t high, u_long alignment, + vm_paddr_t boundary, vm_memattr_t memattr); vm_offset_t kmem_malloc(vm_size_t size, int flags); -vm_offset_t kmem_malloc_domain(int domain, vm_size_t size, int flags); +vm_offset_t kmem_malloc_domainset(struct domainset *ds, vm_size_t size, + int flags); void kmem_free(vm_offset_t addr, vm_size_t size); /* This provides memory for previously allocated address space. */ |