diff options
Diffstat (limited to 'freebsd/sys/kern')
24 files changed, 3113 insertions, 616 deletions
diff --git a/freebsd/sys/kern/init_main.c b/freebsd/sys/kern/init_main.c index 84b44410..604a7f2b 100644 --- a/freebsd/sys/kern/init_main.c +++ b/freebsd/sys/kern/init_main.c @@ -321,6 +321,7 @@ print_version(void *data __unused) while (len > 0 && version[len - 1] == '\n') len--; printf("%.*s %s\n", len, version, machine); + printf("%s\n", compiler_version); } SYSINIT(announce, SI_SUB_COPYRIGHT, SI_ORDER_FIRST, print_caddr_t, @@ -394,6 +395,7 @@ struct sysentvec null_sysvec = { .sv_set_syscall_retval = null_set_syscall_retval, .sv_fetch_syscall_args = null_fetch_syscall_args, .sv_syscallnames = NULL, + .sv_schedtail = NULL, }; #endif /* __rtems__ */ @@ -478,7 +480,7 @@ proc0_init(void *dummy __unused) td->td_user_pri = PUSER; td->td_base_user_pri = PUSER; td->td_priority = PVM; - td->td_base_pri = PUSER; + td->td_base_pri = PVM; td->td_oncpu = 0; td->td_flags = TDF_INMEM|TDP_KTHREAD; td->td_cpuset = cpuset_thread0(); diff --git a/freebsd/sys/kern/kern_event.c b/freebsd/sys/kern/kern_event.c index d318c8ba..62498b25 100644 --- a/freebsd/sys/kern/kern_event.c +++ b/freebsd/sys/kern/kern_event.c @@ -530,6 +530,10 @@ knote_fork(struct knlist *list, int pid) } #endif /* __rtems__ */ +/* + * XXX: EVFILT_TIMER should perhaps live in kern_time.c beside the + * interval timer support code. + */ static int timertoticks(intptr_t data) { @@ -543,7 +547,6 @@ timertoticks(intptr_t data) return tticks; } -/* XXX - move to kern_timeout.c? */ static void filt_timerexpire(void *knx) { @@ -553,9 +556,16 @@ filt_timerexpire(void *knx) kn->kn_data++; KNOTE_ACTIVATE(kn, 0); /* XXX - handle locking */ + /* + * timertoticks() uses tvtohz() which always adds 1 to allow + * for the time until the next clock interrupt being strictly + * less than 1 clock tick. We don't want that here since we + * want to appear to be in sync with the clock interrupt even + * when we're delayed. + */ if ((kn->kn_flags & EV_ONESHOT) != EV_ONESHOT) { calloutp = (struct callout *)kn->kn_hook; - callout_reset_curcpu(calloutp, timertoticks(kn->kn_sdata), + callout_reset_curcpu(calloutp, timertoticks(kn->kn_sdata) - 1, filt_timerexpire, kn); } } @@ -563,7 +573,6 @@ filt_timerexpire(void *knx) /* * data contains amount of time to sleep, in milliseconds */ -/* XXX - move to kern_timeout.c? */ static int filt_timerattach(struct knote *kn) { @@ -587,7 +596,6 @@ filt_timerattach(struct knote *kn) return (0); } -/* XXX - move to kern_timeout.c? */ static void filt_timerdetach(struct knote *kn) { @@ -600,7 +608,6 @@ filt_timerdetach(struct knote *kn) kn->kn_status |= KN_DETACHED; /* knlist_remove usually clears it */ } -/* XXX - move to kern_timeout.c? */ static int filt_timer(struct knote *kn, long hint) { @@ -1851,6 +1858,7 @@ kqueue_close(struct file *fp, struct thread *td) rtems_libio_unlock(); #endif /* __rtems__ */ + seldrain(&kq->kq_sel); knlist_destroy(&kq->kq_sel.si_note); mtx_destroy(&kq->kq_lock); #ifndef __rtems__ diff --git a/freebsd/sys/kern/kern_hhook.c b/freebsd/sys/kern/kern_hhook.c new file mode 100644 index 00000000..3a5503a6 --- /dev/null +++ b/freebsd/sys/kern/kern_hhook.c @@ -0,0 +1,456 @@ +#include <machine/rtems-bsd-config.h> + +/*- + * Copyright (c) 2010 Lawrence Stewart <lstewart@freebsd.org> + * Copyright (c) 2010 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Lawrence Stewart while studying at the Centre + * for Advanced Internet Architectures, Swinburne University of Technology, + * made possible in part by grants from the FreeBSD Foundation and Cisco + * University Research Program Fund at Community Foundation Silicon Valley. + * + * Portions of this software were developed at the Centre for Advanced + * Internet Architectures, Swinburne University of Technology, Melbourne, + * Australia by Lawrence Stewart under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <rtems/bsd/sys/param.h> +#include <sys/kernel.h> +#include <sys/hhook.h> +#include <sys/khelp.h> +#include <sys/malloc.h> +#include <sys/module.h> +#include <sys/module_khelp.h> +#include <sys/osd.h> +#include <sys/queue.h> +#include <sys/refcount.h> +#include <sys/systm.h> + +#include <net/vnet.h> + +struct hhook { + hhook_func_t hhk_func; + struct helper *hhk_helper; + void *hhk_udata; + STAILQ_ENTRY(hhook) hhk_next; +}; + +static MALLOC_DEFINE(M_HHOOK, "hhook", "Helper hooks are linked off hhook_head lists"); + +LIST_HEAD(hhookheadhead, hhook_head); +VNET_DEFINE(struct hhookheadhead, hhook_head_list); +#define V_hhook_head_list VNET(hhook_head_list) + +static struct mtx hhook_head_list_lock; +MTX_SYSINIT(hhookheadlistlock, &hhook_head_list_lock, "hhook_head list lock", + MTX_DEF); + +/* Private function prototypes. */ +static void hhook_head_destroy(struct hhook_head *hhh); + +#define HHHLIST_LOCK() mtx_lock(&hhook_head_list_lock) +#define HHHLIST_UNLOCK() mtx_unlock(&hhook_head_list_lock) +#define HHHLIST_LOCK_ASSERT() mtx_assert(&hhook_head_list_lock, MA_OWNED) + +#define HHH_LOCK_INIT(hhh) rm_init(&(hhh)->hhh_lock, "hhook_head rm lock") +#define HHH_LOCK_DESTROY(hhh) rm_destroy(&(hhh)->hhh_lock) +#define HHH_WLOCK(hhh) rm_wlock(&(hhh)->hhh_lock) +#define HHH_WUNLOCK(hhh) rm_wunlock(&(hhh)->hhh_lock) +#define HHH_RLOCK(hhh, rmpt) rm_rlock(&(hhh)->hhh_lock, (rmpt)) +#define HHH_RUNLOCK(hhh, rmpt) rm_runlock(&(hhh)->hhh_lock, (rmpt)) + +/* + * Run all helper hook functions for a given hook point. + */ +void +hhook_run_hooks(struct hhook_head *hhh, void *ctx_data, struct osd *hosd) +{ + struct hhook *hhk; + void *hdata; + struct rm_priotracker rmpt; + + KASSERT(hhh->hhh_refcount > 0, ("hhook_head %p refcount is 0", hhh)); + + HHH_RLOCK(hhh, &rmpt); + STAILQ_FOREACH(hhk, &hhh->hhh_hooks, hhk_next) { + if (hhk->hhk_helper->h_flags & HELPER_NEEDS_OSD) { + hdata = osd_get(OSD_KHELP, hosd, hhk->hhk_helper->h_id); + if (hdata == NULL) + continue; + } else + hdata = NULL; + + /* + * XXXLAS: We currently ignore the int returned by the hook, + * but will likely want to handle it in future to allow hhook to + * be used like pfil and effect changes at the hhook calling + * site e.g. we could define a new hook type of HHOOK_TYPE_PFIL + * and standardise what particular return values mean and set + * the context data to pass exactly the same information as pfil + * hooks currently receive, thus replicating pfil with hhook. + */ + hhk->hhk_func(hhh->hhh_type, hhh->hhh_id, hhk->hhk_udata, + ctx_data, hdata, hosd); + } + HHH_RUNLOCK(hhh, &rmpt); +} + +/* + * Register a new helper hook function with a helper hook point. + */ +int +hhook_add_hook(struct hhook_head *hhh, struct hookinfo *hki, uint32_t flags) +{ + struct hhook *hhk, *tmp; + int error; + + error = 0; + + if (hhh == NULL) + return (ENOENT); + + hhk = malloc(sizeof(struct hhook), M_HHOOK, + M_ZERO | ((flags & HHOOK_WAITOK) ? M_WAITOK : M_NOWAIT)); + + if (hhk == NULL) + return (ENOMEM); + + hhk->hhk_helper = hki->hook_helper; + hhk->hhk_func = hki->hook_func; + hhk->hhk_udata = hki->hook_udata; + + HHH_WLOCK(hhh); + STAILQ_FOREACH(tmp, &hhh->hhh_hooks, hhk_next) { + if (tmp->hhk_func == hki->hook_func && + tmp->hhk_udata == hki->hook_udata) { + /* The helper hook function is already registered. */ + error = EEXIST; + break; + } + } + + if (!error) { + STAILQ_INSERT_TAIL(&hhh->hhh_hooks, hhk, hhk_next); + hhh->hhh_nhooks++; + } else + free(hhk, M_HHOOK); + + HHH_WUNLOCK(hhh); + + return (error); +} + +/* + * Lookup a helper hook point and register a new helper hook function with it. + */ +int +hhook_add_hook_lookup(struct hookinfo *hki, uint32_t flags) +{ + struct hhook_head *hhh; + int error; + + hhh = hhook_head_get(hki->hook_type, hki->hook_id); + + if (hhh == NULL) + return (ENOENT); + + error = hhook_add_hook(hhh, hki, flags); + hhook_head_release(hhh); + + return (error); +} + +/* + * Remove a helper hook function from a helper hook point. + */ +int +hhook_remove_hook(struct hhook_head *hhh, struct hookinfo *hki) +{ + struct hhook *tmp; + + if (hhh == NULL) + return (ENOENT); + + HHH_WLOCK(hhh); + STAILQ_FOREACH(tmp, &hhh->hhh_hooks, hhk_next) { + if (tmp->hhk_func == hki->hook_func && + tmp->hhk_udata == hki->hook_udata) { + STAILQ_REMOVE(&hhh->hhh_hooks, tmp, hhook, hhk_next); + free(tmp, M_HHOOK); + hhh->hhh_nhooks--; + break; + } + } + HHH_WUNLOCK(hhh); + + return (0); +} + +/* + * Lookup a helper hook point and remove a helper hook function from it. + */ +int +hhook_remove_hook_lookup(struct hookinfo *hki) +{ + struct hhook_head *hhh; + + hhh = hhook_head_get(hki->hook_type, hki->hook_id); + + if (hhh == NULL) + return (ENOENT); + + hhook_remove_hook(hhh, hki); + hhook_head_release(hhh); + + return (0); +} + +/* + * Register a new helper hook point. + */ +int +hhook_head_register(int32_t hhook_type, int32_t hhook_id, struct hhook_head **hhh, + uint32_t flags) +{ + struct hhook_head *tmphhh; + + tmphhh = hhook_head_get(hhook_type, hhook_id); + + if (tmphhh != NULL) { + /* Hook point previously registered. */ + hhook_head_release(tmphhh); + return (EEXIST); + } + + /* XXXLAS: Need to implement support for non-virtualised hooks. */ + if ((flags & HHOOK_HEADISINVNET) == 0) { + printf("%s: only vnet-style virtualised hooks can be used\n", + __func__); + return (EINVAL); + } + + tmphhh = malloc(sizeof(struct hhook_head), M_HHOOK, + M_ZERO | ((flags & HHOOK_WAITOK) ? M_WAITOK : M_NOWAIT)); + + if (tmphhh == NULL) + return (ENOMEM); + + tmphhh->hhh_type = hhook_type; + tmphhh->hhh_id = hhook_id; + tmphhh->hhh_nhooks = 0; + STAILQ_INIT(&tmphhh->hhh_hooks); + HHH_LOCK_INIT(tmphhh); + + if (hhh != NULL) + refcount_init(&tmphhh->hhh_refcount, 1); + else + refcount_init(&tmphhh->hhh_refcount, 0); + + if (flags & HHOOK_HEADISINVNET) { + tmphhh->hhh_flags |= HHH_ISINVNET; + HHHLIST_LOCK(); + LIST_INSERT_HEAD(&V_hhook_head_list, tmphhh, hhh_next); + HHHLIST_UNLOCK(); + } else { + /* XXXLAS: Add tmphhh to the non-virtualised list. */ + } + + *hhh = tmphhh; + + return (0); +} + +static void +hhook_head_destroy(struct hhook_head *hhh) +{ + struct hhook *tmp, *tmp2; + + HHHLIST_LOCK_ASSERT(); + + LIST_REMOVE(hhh, hhh_next); + HHH_WLOCK(hhh); + STAILQ_FOREACH_SAFE(tmp, &hhh->hhh_hooks, hhk_next, tmp2) + free(tmp, M_HHOOK); + HHH_WUNLOCK(hhh); + HHH_LOCK_DESTROY(hhh); + free(hhh, M_HHOOK); +} + +/* + * Remove a helper hook point. + */ +int +hhook_head_deregister(struct hhook_head *hhh) +{ + int error; + + error = 0; + + HHHLIST_LOCK(); + if (hhh == NULL) + error = ENOENT; + else if (hhh->hhh_refcount > 1) + error = EBUSY; + else + hhook_head_destroy(hhh); + HHHLIST_UNLOCK(); + + return (error); +} + +/* + * Remove a helper hook point via a hhook_head lookup. + */ +int +hhook_head_deregister_lookup(int32_t hhook_type, int32_t hhook_id) +{ + struct hhook_head *hhh; + int error; + + hhh = hhook_head_get(hhook_type, hhook_id); + error = hhook_head_deregister(hhh); + + if (error == EBUSY) + hhook_head_release(hhh); + + return (error); +} + +/* + * Lookup and return the hhook_head struct associated with the specified type + * and id, or NULL if not found. If found, the hhook_head's refcount is bumped. + */ +struct hhook_head * +hhook_head_get(int32_t hhook_type, int32_t hhook_id) +{ + struct hhook_head *hhh; + + /* XXXLAS: Pick hhook_head_list based on hhook_head flags. */ + HHHLIST_LOCK(); + LIST_FOREACH(hhh, &V_hhook_head_list, hhh_next) { + if (hhh->hhh_type == hhook_type && hhh->hhh_id == hhook_id) { + refcount_acquire(&hhh->hhh_refcount); + break; + } + } + HHHLIST_UNLOCK(); + + return (hhh); +} + +void +hhook_head_release(struct hhook_head *hhh) +{ + + refcount_release(&hhh->hhh_refcount); +} + +/* + * Check the hhook_head private flags and return the appropriate public + * representation of the flag to the caller. The function is implemented in a + * way that allows us to cope with other subsystems becoming virtualised in the + * future. + */ +uint32_t +hhook_head_is_virtualised(struct hhook_head *hhh) +{ + uint32_t ret; + + ret = 0; + + if (hhh != NULL) { + if (hhh->hhh_flags & HHH_ISINVNET) + ret = HHOOK_HEADISINVNET; + } + + return (ret); +} + +uint32_t +hhook_head_is_virtualised_lookup(int32_t hook_type, int32_t hook_id) +{ + struct hhook_head *hhh; + uint32_t ret; + + hhh = hhook_head_get(hook_type, hook_id); + + if (hhh == NULL) + return (0); + + ret = hhook_head_is_virtualised(hhh); + hhook_head_release(hhh); + + return (ret); +} + +/* + * Vnet created and being initialised. + */ +static void +hhook_vnet_init(const void *unused __unused) +{ + + LIST_INIT(&V_hhook_head_list); +} + +/* + * Vnet being torn down and destroyed. + */ +static void +hhook_vnet_uninit(const void *unused __unused) +{ + struct hhook_head *hhh, *tmphhh; + + /* + * If subsystems which export helper hook points use the hhook KPI + * correctly, the loop below should have no work to do because the + * subsystem should have already called hhook_head_deregister(). + */ + HHHLIST_LOCK(); + LIST_FOREACH_SAFE(hhh, &V_hhook_head_list, hhh_next, tmphhh) { + printf("%s: hhook_head type=%d, id=%d cleanup required\n", + __func__, hhh->hhh_type, hhh->hhh_id); + hhook_head_destroy(hhh); + } + HHHLIST_UNLOCK(); +} + + +/* + * When a vnet is created and being initialised, init the V_hhook_head_list. + */ +VNET_SYSINIT(hhook_vnet_init, SI_SUB_PROTO_BEGIN, SI_ORDER_FIRST, + hhook_vnet_init, NULL); + +/* + * The hhook KPI provides a mechanism for subsystems which export helper hook + * points to clean up on vnet tear down, but in case the KPI is misused, + * provide a function to clean up and free memory for a vnet being destroyed. + */ +VNET_SYSUNINIT(hhook_vnet_uninit, SI_SUB_PROTO_BEGIN, SI_ORDER_FIRST, + hhook_vnet_uninit, NULL); diff --git a/freebsd/sys/kern/kern_intr.c b/freebsd/sys/kern/kern_intr.c index b23ce519..336866c8 100644 --- a/freebsd/sys/kern/kern_intr.c +++ b/freebsd/sys/kern/kern_intr.c @@ -148,22 +148,18 @@ intr_priority(enum intr_type flags) INTR_TYPE_CAM | INTR_TYPE_MISC | INTR_TYPE_CLK | INTR_TYPE_AV); switch (flags) { case INTR_TYPE_TTY: - pri = PI_TTYLOW; + pri = PI_TTY; break; case INTR_TYPE_BIO: - /* - * XXX We need to refine this. BSD/OS distinguishes - * between tape and disk priorities. - */ pri = PI_DISK; break; case INTR_TYPE_NET: pri = PI_NET; break; case INTR_TYPE_CAM: - pri = PI_DISK; /* XXX or PI_CAM? */ + pri = PI_DISK; break; - case INTR_TYPE_AV: /* Audio/video */ + case INTR_TYPE_AV: pri = PI_AV; break; case INTR_TYPE_CLK: @@ -202,6 +198,9 @@ ithread_update(struct intr_thread *ithd) /* Update name and priority. */ strlcpy(td->td_name, ie->ie_fullname, sizeof(td->td_name)); +#ifdef KTR + sched_clear_tdname(td); +#endif thread_lock(td); #ifndef __rtems__ sched_prio(td, pri); @@ -1118,6 +1117,7 @@ int swi_add(struct intr_event **eventp, const char *name, driver_intr_t handler, void *arg, int pri, enum intr_type flags, void **cookiep) { + struct thread *td; struct intr_event *ie; int error; @@ -1138,16 +1138,15 @@ swi_add(struct intr_event **eventp, const char *name, driver_intr_t handler, *eventp = ie; } error = intr_event_add_handler(ie, name, NULL, handler, arg, - (pri * RQ_PPQ) + PI_SOFT, flags, cookiep); + PI_SWI(pri), flags, cookiep); if (error) return (error); #ifndef __rtems__ if (pri == SWI_CLOCK) { - struct proc *p; - p = ie->ie_thread->it_thread->td_proc; - PROC_LOCK(p); - p->p_flag |= P_NOLOAD; - PROC_UNLOCK(p); + td = ie->ie_thread->it_thread; + thread_lock(td); + td->td_flags |= TDF_NOLOAD; + thread_unlock(td); } #else /* __rtems__ */ // Do _not_ ignore the thread in the load avarage @@ -1742,18 +1741,13 @@ db_dump_intrhand(struct intr_handler *ih) case PI_AV: db_printf("AV "); break; - case PI_TTYHIGH: - case PI_TTYLOW: + case PI_TTY: db_printf("TTY "); break; - case PI_TAPE: - db_printf("TAPE"); - break; case PI_NET: db_printf("NET "); break; case PI_DISK: - case PI_DISKLOW: db_printf("DISK"); break; case PI_DULL: diff --git a/freebsd/sys/kern/kern_khelp.c b/freebsd/sys/kern/kern_khelp.c new file mode 100644 index 00000000..ce8dd662 --- /dev/null +++ b/freebsd/sys/kern/kern_khelp.c @@ -0,0 +1,475 @@ +#include <machine/rtems-bsd-config.h> + +/*- + * Copyright (c) 2010 Lawrence Stewart <lstewart@freebsd.org> + * Copyright (c) 2010 The FreeBSD Foundation + * All rights reserved. + * + * This software was developed by Lawrence Stewart while studying at the Centre + * for Advanced Internet Architectures, Swinburne University of Technology, + * made possible in part by grants from the FreeBSD Foundation and Cisco + * University Research Program Fund at Community Foundation Silicon Valley. + * + * Portions of this software were developed at the Centre for Advanced + * Internet Architectures, Swinburne University of Technology, Melbourne, + * Australia by Lawrence Stewart under sponsorship from the FreeBSD Foundation. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <rtems/bsd/sys/param.h> +#include <sys/kernel.h> +#include <sys/hhook.h> +#include <sys/jail.h> +#include <sys/khelp.h> +#include <rtems/bsd/sys/lock.h> +#include <sys/malloc.h> +#include <sys/module.h> +#include <sys/module_khelp.h> +#include <sys/osd.h> +#include <sys/queue.h> +#include <sys/refcount.h> +#include <sys/rwlock.h> +#include <sys/systm.h> + +#include <net/vnet.h> + +static struct rwlock khelp_list_lock; +RW_SYSINIT(khelplistlock, &khelp_list_lock, "helper list lock"); + +static TAILQ_HEAD(helper_head, helper) helpers = TAILQ_HEAD_INITIALIZER(helpers); + +/* Private function prototypes. */ +static inline void khelp_remove_osd(struct helper *h, struct osd *hosd); + +#define KHELP_LIST_WLOCK() rw_wlock(&khelp_list_lock) +#define KHELP_LIST_WUNLOCK() rw_wunlock(&khelp_list_lock) +#define KHELP_LIST_RLOCK() rw_rlock(&khelp_list_lock) +#define KHELP_LIST_RUNLOCK() rw_runlock(&khelp_list_lock) +#define KHELP_LIST_LOCK_ASSERT() rw_assert(&khelp_list_lock, RA_LOCKED) + +int +khelp_register_helper(struct helper *h) +{ + struct helper *tmph; + int error, i, inserted; + + error = 0; + inserted = 0; + refcount_init(&h->h_refcount, 0); + h->h_id = osd_register(OSD_KHELP, NULL, NULL); + + /* It's only safe to add the hooks after osd_register(). */ + if (h->h_nhooks > 0) { + for (i = 0; i < h->h_nhooks && !error; i++) { + /* We don't require the module to assign hook_helper. */ + h->h_hooks[i].hook_helper = h; + error = khelp_add_hhook(&h->h_hooks[i], HHOOK_NOWAIT); + } + + if (error) { + for (i--; i >= 0; i--) + khelp_remove_hhook(&h->h_hooks[i]); + + osd_deregister(OSD_KHELP, h->h_id); + } + } + + if (!error) { + KHELP_LIST_WLOCK(); + /* + * Keep list of helpers sorted in descending h_id order. Due to + * the way osd_set() works, a sorted list ensures + * init_helper_osd() will operate with improved efficiency. + */ + TAILQ_FOREACH(tmph, &helpers, h_next) { + if (tmph->h_id < h->h_id) { + TAILQ_INSERT_BEFORE(tmph, h, h_next); + inserted = 1; + break; + } + } + + if (!inserted) + TAILQ_INSERT_TAIL(&helpers, h, h_next); + KHELP_LIST_WUNLOCK(); + } + + return (error); +} + +int +khelp_deregister_helper(struct helper *h) +{ + struct helper *tmph; + int error, i; + + error = 0; + + KHELP_LIST_WLOCK(); + if (h->h_refcount > 0) + error = EBUSY; + else { + error = ENOENT; + TAILQ_FOREACH(tmph, &helpers, h_next) { + if (tmph == h) { + TAILQ_REMOVE(&helpers, h, h_next); + error = 0; + break; + } + } + } + KHELP_LIST_WUNLOCK(); + + if (!error) { + if (h->h_nhooks > 0) { + for (i = 0; i < h->h_nhooks; i++) + khelp_remove_hhook(&h->h_hooks[i]); + } + osd_deregister(OSD_KHELP, h->h_id); + } + + return (error); +} + +int +khelp_init_osd(uint32_t classes, struct osd *hosd) +{ + struct helper *h; + void *hdata; + int error; + + KASSERT(hosd != NULL, ("struct osd not initialised!")); + + error = 0; + + KHELP_LIST_RLOCK(); + TAILQ_FOREACH(h, &helpers, h_next) { + /* If helper is correct class and needs to store OSD... */ + if (h->h_classes & classes && h->h_flags & HELPER_NEEDS_OSD) { + hdata = uma_zalloc(h->h_zone, M_NOWAIT); + if (hdata == NULL) { + error = ENOMEM; + break; + } + osd_set(OSD_KHELP, hosd, h->h_id, hdata); + refcount_acquire(&h->h_refcount); + } + } + + if (error) { + /* Delete OSD that was assigned prior to the error. */ + TAILQ_FOREACH(h, &helpers, h_next) { + if (h->h_classes & classes) + khelp_remove_osd(h, hosd); + } + } + KHELP_LIST_RUNLOCK(); + + return (error); +} + +int +khelp_destroy_osd(struct osd *hosd) +{ + struct helper *h; + int error; + + KASSERT(hosd != NULL, ("struct osd not initialised!")); + + error = 0; + + KHELP_LIST_RLOCK(); + /* + * Clean up all khelp related OSD. + * + * XXXLAS: Would be nice to use something like osd_exit() here but it + * doesn't have the right semantics for this purpose. + */ + TAILQ_FOREACH(h, &helpers, h_next) + khelp_remove_osd(h, hosd); + KHELP_LIST_RUNLOCK(); + + return (error); +} + +static inline void +khelp_remove_osd(struct helper *h, struct osd *hosd) +{ + void *hdata; + + if (h->h_flags & HELPER_NEEDS_OSD) { + /* + * If the current helper uses OSD and calling osd_get() + * on the helper's h_id returns non-NULL, the helper has + * OSD attached to 'hosd' which needs to be cleaned up. + */ + hdata = osd_get(OSD_KHELP, hosd, h->h_id); + if (hdata != NULL) { + uma_zfree(h->h_zone, hdata); + osd_del(OSD_KHELP, hosd, h->h_id); + refcount_release(&h->h_refcount); + } + } +} + +void * +khelp_get_osd(struct osd *hosd, int32_t id) +{ + + return (osd_get(OSD_KHELP, hosd, id)); +} + +int32_t +khelp_get_id(char *hname) +{ + struct helper *h; + int32_t id; + + id = -1; + + KHELP_LIST_RLOCK(); + TAILQ_FOREACH(h, &helpers, h_next) { + if (strncmp(h->h_name, hname, HELPER_NAME_MAXLEN) == 0) { + id = h->h_id; + break; + } + } + KHELP_LIST_RUNLOCK(); + + return (id); +} + +int +khelp_add_hhook(struct hookinfo *hki, uint32_t flags) +{ + VNET_ITERATOR_DECL(vnet_iter); + int error; + + error = 0; + + /* + * XXXLAS: If a helper is dynamically adding a helper hook function at + * runtime using this function, we should update the helper's h_hooks + * struct member to include the additional hookinfo struct. + */ + + VNET_LIST_RLOCK_NOSLEEP(); + VNET_FOREACH(vnet_iter) { + CURVNET_SET(vnet_iter); + error = hhook_add_hook_lookup(hki, flags); + CURVNET_RESTORE(); +#ifdef VIMAGE + if (error) + break; +#endif + } + VNET_LIST_RUNLOCK_NOSLEEP(); + + return (error); +} + +int +khelp_remove_hhook(struct hookinfo *hki) +{ + VNET_ITERATOR_DECL(vnet_iter); + int error; + + error = 0; + + /* + * XXXLAS: If a helper is dynamically removing a helper hook function at + * runtime using this function, we should update the helper's h_hooks + * struct member to remove the defunct hookinfo struct. + */ + + VNET_LIST_RLOCK_NOSLEEP(); + VNET_FOREACH(vnet_iter) { + CURVNET_SET(vnet_iter); + error = hhook_remove_hook_lookup(hki); + CURVNET_RESTORE(); +#ifdef VIMAGE + if (error) + break; +#endif + } + VNET_LIST_RUNLOCK_NOSLEEP(); + + return (error); +} + +#ifndef __rtems__ +int +khelp_modevent(module_t mod, int event_type, void *data) +{ + struct khelp_modevent_data *kmd; + int error; + + kmd = (struct khelp_modevent_data *)data; + error = 0; + + switch(event_type) { + case MOD_LOAD: + if (kmd->helper->h_flags & HELPER_NEEDS_OSD) { + if (kmd->uma_zsize <= 0) { + printf("Use KHELP_DECLARE_MOD_UMA() instead!\n"); + error = EDOOFUS; + break; + } + kmd->helper->h_zone = uma_zcreate(kmd->name, + kmd->uma_zsize, kmd->umactor, kmd->umadtor, NULL, + NULL, 0, 0); + if (kmd->helper->h_zone == NULL) { + error = ENOMEM; + break; + } + } + strlcpy(kmd->helper->h_name, kmd->name, HELPER_NAME_MAXLEN); + kmd->helper->h_hooks = kmd->hooks; + kmd->helper->h_nhooks = kmd->nhooks; + if (kmd->helper->mod_init != NULL) + error = kmd->helper->mod_init(); + if (!error) + error = khelp_register_helper(kmd->helper); + break; + + case MOD_QUIESCE: + case MOD_SHUTDOWN: + case MOD_UNLOAD: + error = khelp_deregister_helper(kmd->helper); + if (!error) { + if (kmd->helper->h_flags & HELPER_NEEDS_OSD) + uma_zdestroy(kmd->helper->h_zone); + if (kmd->helper->mod_destroy != NULL) + kmd->helper->mod_destroy(); + } else if (error == ENOENT) + /* Do nothing and allow unload if helper not in list. */ + error = 0; + else if (error == EBUSY) + printf("Khelp module \"%s\" can't unload until its " + "refcount drops from %d to 0.\n", kmd->name, + kmd->helper->h_refcount); + break; + + default: + error = EINVAL; + break; + } + + return (error); +} +#endif /* __rtems__ */ + +/* + * This function is called in two separate situations: + * + * - When the kernel is booting, it is called directly by the SYSINIT framework + * to allow Khelp modules which were compiled into the kernel or loaded by the + * boot loader to insert their non-virtualised hook functions into the kernel. + * + * - When the kernel is booting or a vnet is created, this function is also + * called indirectly through khelp_vnet_init() by the vnet initialisation code. + * In this situation, Khelp modules are able to insert their virtualised hook + * functions into the virtualised hook points in the vnet which is being + * initialised. In the case where the kernel is not compiled with "options + * VIMAGE", this step is still run once at boot, but the hook functions get + * transparently inserted into the standard unvirtualised network stack. + */ +static void +khelp_init(const void *vnet) +{ + struct helper *h; + int error, i, vinit; + int32_t htype, hid; + + error = 0; + vinit = vnet != NULL; + + KHELP_LIST_RLOCK(); + TAILQ_FOREACH(h, &helpers, h_next) { + for (i = 0; i < h->h_nhooks && !error; i++) { + htype = h->h_hooks[i].hook_type; + hid = h->h_hooks[i].hook_id; + + /* + * If we're doing a virtualised init (vinit != 0) and + * the hook point is virtualised, or we're doing a plain + * sysinit at boot and the hook point is not + * virtualised, insert the hook. + */ + if ((hhook_head_is_virtualised_lookup(htype, hid) == + HHOOK_HEADISINVNET && vinit) || + (!hhook_head_is_virtualised_lookup(htype, hid) && + !vinit)) { + error = hhook_add_hook_lookup(&h->h_hooks[i], + HHOOK_NOWAIT); + } + } + + if (error) { + /* Remove any helper's hooks we successfully added. */ + for (i--; i >= 0; i--) + hhook_remove_hook_lookup(&h->h_hooks[i]); + + printf("%s: Failed to add hooks for helper \"%s\" (%p)", + __func__, h->h_name, h); + if (vinit) + printf(" to vnet %p.\n", vnet); + else + printf(".\n"); + + error = 0; + } + } + KHELP_LIST_RUNLOCK(); +} + +/* + * Vnet created and being initialised. + */ +static void +khelp_vnet_init(const void *unused __unused) +{ + + khelp_init(TD_TO_VNET(curthread)); +} + + +/* + * As the kernel boots, allow Khelp modules which were compiled into the kernel + * or loaded by the boot loader to insert their non-virtualised hook functions + * into the kernel. + */ +SYSINIT(khelp_init, SI_SUB_PROTO_END, SI_ORDER_FIRST, khelp_init, NULL); + +/* + * When a vnet is created and being initialised, we need to insert the helper + * hook functions for all currently registered Khelp modules into the vnet's + * helper hook points. The hhook KPI provides a mechanism for subsystems which + * export helper hook points to clean up on vnet shutdown, so we don't need a + * VNET_SYSUNINIT for Khelp. + */ +VNET_SYSINIT(khelp_vnet_init, SI_SUB_PROTO_END, SI_ORDER_FIRST, + khelp_vnet_init, NULL); diff --git a/freebsd/sys/kern/kern_linker.c b/freebsd/sys/kern/kern_linker.c index d2318552..90c73a70 100644 --- a/freebsd/sys/kern/kern_linker.c +++ b/freebsd/sys/kern/kern_linker.c @@ -71,6 +71,9 @@ int kld_debug = 0; #define KLD_LOCK() sx_xlock(&kld_sx) #define KLD_UNLOCK() sx_xunlock(&kld_sx) +#define KLD_DOWNGRADE() sx_downgrade(&kld_sx) +#define KLD_LOCK_READ() sx_slock(&kld_sx) +#define KLD_UNLOCK_READ() sx_sunlock(&kld_sx) #define KLD_LOCKED() sx_xlocked(&kld_sx) #define KLD_LOCK_ASSERT() do { \ if (!cold) \ @@ -389,7 +392,7 @@ linker_load_file(const char *filename, linker_file_t *result) { linker_class_t lc; linker_file_t lf; - int foundfile, error; + int foundfile, error, modules; /* Refuse to load modules if securelevel raised */ if (prison0.pr_securelevel > 0) @@ -428,11 +431,22 @@ linker_load_file(const char *filename, linker_file_t *result) linker_file_unload(lf, LINKER_UNLOAD_FORCE); return (error); } + modules = !TAILQ_EMPTY(&lf->modules); KLD_UNLOCK(); linker_file_register_sysctls(lf); linker_file_sysinit(lf); KLD_LOCK(); lf->flags |= LINKER_FILE_LINKED; + + /* + * If all of the modules in this file failed + * to load, unload the file and return an + * error of ENOEXEC. + */ + if (modules && TAILQ_EMPTY(&lf->modules)) { + linker_file_unload(lf, LINKER_UNLOAD_FORCE); + return (ENOEXEC); + } *result = lf; return (0); } @@ -636,7 +650,7 @@ linker_file_unload(linker_file_t file, int flags) /* * Inform any modules associated with this file that they are - * being be unloaded. + * being unloaded. */ MOD_XLOCK; for (mod = TAILQ_FIRST(&file->modules); mod; mod = next) { @@ -647,8 +661,12 @@ linker_file_unload(linker_file_t file, int flags) * Give the module a chance to veto the unload. */ if ((error = module_unload(mod)) != 0) { +#ifdef KLD_DEBUG + MOD_SLOCK; KLD_DPF(FILE, ("linker_file_unload: module %s" - " failed unload\n", mod)); + " failed unload\n", module_getname(mod))); + MOD_SUNLOCK; +#endif return (error); } MOD_XLOCK; @@ -1030,18 +1048,24 @@ kern_kldload(struct thread *td, const char *file, int *fileid) KLD_LOCK(); error = linker_load_module(kldname, modname, NULL, NULL, &lf); - if (error) - goto unlock; + if (error) { + KLD_UNLOCK(); + goto done; + } + lf->userrefs++; + if (fileid != NULL) + *fileid = lf->id; #ifdef HWPMC_HOOKS + KLD_DOWNGRADE(); pkm.pm_file = lf->filename; pkm.pm_address = (uintptr_t) lf->address; PMC_CALL_HOOK(td, PMC_FN_KLD_LOAD, (void *) &pkm); -#endif - lf->userrefs++; - if (fileid != NULL) - *fileid = lf->id; -unlock: + KLD_UNLOCK_READ(); +#else KLD_UNLOCK(); +#endif + +done: CURVNET_RESTORE(); return (error); } @@ -1113,10 +1137,15 @@ kern_kldunload(struct thread *td, int fileid, int flags) error = ENOENT; #ifdef HWPMC_HOOKS - if (error == 0) + if (error == 0) { + KLD_DOWNGRADE(); PMC_CALL_HOOK(td, PMC_FN_KLD_UNLOAD, (void *) &pkm); -#endif + KLD_UNLOCK_READ(); + } else + KLD_UNLOCK(); +#else KLD_UNLOCK(); +#endif CURVNET_RESTORE(); return (error); } @@ -1212,29 +1241,39 @@ int kldstat(struct thread *td, struct kldstat_args *uap) { struct kld_file_stat stat; - linker_file_t lf; - int error, namelen, version, version_num; + int error, version; /* * Check the version of the user's structure. */ - if ((error = copyin(&uap->stat->version, &version, sizeof(version))) != 0) + if ((error = copyin(&uap->stat->version, &version, sizeof(version))) + != 0) return (error); - if (version == sizeof(struct kld_file_stat_1)) - version_num = 1; - else if (version == sizeof(struct kld_file_stat)) - version_num = 2; - else + if (version != sizeof(struct kld_file_stat_1) && + version != sizeof(struct kld_file_stat)) return (EINVAL); + error = kern_kldstat(td, uap->fileid, &stat); + if (error != 0) + return (error); + return (copyout(&stat, uap->stat, version)); +} + +int +kern_kldstat(struct thread *td, int fileid, struct kld_file_stat *stat) +{ + linker_file_t lf; + int namelen; #ifdef MAC + int error; + error = mac_kld_check_stat(td->td_ucred); if (error) return (error); #endif KLD_LOCK(); - lf = linker_find_file_by_id(uap->fileid); + lf = linker_find_file_by_id(fileid); if (lf == NULL) { KLD_UNLOCK(); return (ENOENT); @@ -1244,23 +1283,20 @@ kldstat(struct thread *td, struct kldstat_args *uap) namelen = strlen(lf->filename) + 1; if (namelen > MAXPATHLEN) namelen = MAXPATHLEN; - bcopy(lf->filename, &stat.name[0], namelen); - stat.refs = lf->refs; - stat.id = lf->id; - stat.address = lf->address; - stat.size = lf->size; - if (version_num > 1) { - /* Version 2 fields: */ - namelen = strlen(lf->pathname) + 1; - if (namelen > MAXPATHLEN) - namelen = MAXPATHLEN; - bcopy(lf->pathname, &stat.pathname[0], namelen); - } + bcopy(lf->filename, &stat->name[0], namelen); + stat->refs = lf->refs; + stat->id = lf->id; + stat->address = lf->address; + stat->size = lf->size; + /* Version 2 fields: */ + namelen = strlen(lf->pathname) + 1; + if (namelen > MAXPATHLEN) + namelen = MAXPATHLEN; + bcopy(lf->pathname, &stat->pathname[0], namelen); KLD_UNLOCK(); td->td_retval[0] = 0; - - return (copyout(&stat, uap->stat, version)); + return (0); } int @@ -1928,7 +1964,7 @@ linker_hwpmc_list_objects(void) int i, nmappings; nmappings = 0; - KLD_LOCK(); + KLD_LOCK_READ(); TAILQ_FOREACH(lf, &linker_files, link) nmappings++; @@ -1943,7 +1979,7 @@ linker_hwpmc_list_objects(void) kobase[i].pm_address = (uintptr_t)lf->address; i++; } - KLD_UNLOCK(); + KLD_UNLOCK_READ(); KASSERT(i > 0, ("linker_hpwmc_list_objects: no kernel objects?")); diff --git a/freebsd/sys/kern/kern_mib.c b/freebsd/sys/kern/kern_mib.c index 25058a79..c513463d 100644 --- a/freebsd/sys/kern/kern_mib.c +++ b/freebsd/sys/kern/kern_mib.c @@ -108,6 +108,9 @@ SYSCTL_INT(_kern, KERN_OSREV, osrevision, CTLFLAG_RD, SYSCTL_STRING(_kern, KERN_VERSION, version, CTLFLAG_RD|CTLFLAG_MPSAFE, version, 0, "Kernel version"); +SYSCTL_STRING(_kern, OID_AUTO, compiler_version, CTLFLAG_RD|CTLFLAG_MPSAFE, + compiler_version, 0, "Version of compiler used to compile kernel"); + SYSCTL_STRING(_kern, KERN_OSTYPE, ostype, CTLFLAG_RD|CTLFLAG_MPSAFE, ostype, 0, "Operating system type"); diff --git a/freebsd/sys/kern/kern_osd.c b/freebsd/sys/kern/kern_osd.c new file mode 100644 index 00000000..167607e0 --- /dev/null +++ b/freebsd/sys/kern/kern_osd.c @@ -0,0 +1,405 @@ +#include <machine/rtems-bsd-config.h> + +/*- + * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <rtems/bsd/sys/param.h> +#include <sys/kernel.h> +#include <sys/systm.h> +#include <sys/sysctl.h> +#include <rtems/bsd/sys/errno.h> +#include <sys/jail.h> +#include <sys/malloc.h> +#include <rtems/bsd/sys/lock.h> +#include <sys/mutex.h> +#include <sys/rmlock.h> +#include <sys/sx.h> +#include <sys/queue.h> +#include <sys/proc.h> +#include <sys/osd.h> + +/* OSD (Object Specific Data) */ + +static MALLOC_DEFINE(M_OSD, "osd", "Object Specific Data"); + +static int osd_debug = 0; +TUNABLE_INT("debug.osd", &osd_debug); +SYSCTL_INT(_debug, OID_AUTO, osd, CTLFLAG_RW, &osd_debug, 0, "OSD debug level"); + +#define OSD_DEBUG(...) do { \ + if (osd_debug) { \ + printf("OSD (%s:%u): ", __func__, __LINE__); \ + printf(__VA_ARGS__); \ + printf("\n"); \ + } \ +} while (0) + +static void do_osd_del(u_int type, struct osd *osd, u_int slot, + int list_locked); + +/* + * Lists of objects with OSD. + * + * Lock key: + * (m) osd_module_lock + * (o) osd_object_lock + * (l) osd_list_lock + */ +static LIST_HEAD(, osd) osd_list[OSD_LAST + 1]; /* (m) */ +static osd_method_t *osd_methods[OSD_LAST + 1]; /* (m) */ +static u_int osd_nslots[OSD_LAST + 1]; /* (m) */ +static osd_destructor_t *osd_destructors[OSD_LAST + 1]; /* (o) */ +static const u_int osd_nmethods[OSD_LAST + 1] = { + [OSD_JAIL] = PR_MAXMETHOD, +}; + +static struct sx osd_module_lock[OSD_LAST + 1]; +static struct rmlock osd_object_lock[OSD_LAST + 1]; +static struct mtx osd_list_lock[OSD_LAST + 1]; + +static void +osd_default_destructor(void *value __unused) +{ + /* Do nothing. */ +} + +int +osd_register(u_int type, osd_destructor_t destructor, osd_method_t *methods) +{ + void *newptr; + u_int i, m; + + KASSERT(type >= OSD_FIRST && type <= OSD_LAST, ("Invalid type.")); + + /* + * If no destructor is given, use default one. We need to use some + * destructor, because NULL destructor means unused slot. + */ + if (destructor == NULL) + destructor = osd_default_destructor; + + sx_xlock(&osd_module_lock[type]); + /* + * First, we try to find unused slot. + */ + for (i = 0; i < osd_nslots[type]; i++) { + if (osd_destructors[type][i] == NULL) { + OSD_DEBUG("Unused slot found (type=%u, slot=%u).", + type, i); + break; + } + } + /* + * If no unused slot was found, allocate one. + */ + if (i == osd_nslots[type]) { + osd_nslots[type]++; + if (osd_nmethods[type] != 0) + osd_methods[type] = realloc(osd_methods[type], + sizeof(osd_method_t) * osd_nslots[type] * + osd_nmethods[type], M_OSD, M_WAITOK); + newptr = malloc(sizeof(osd_destructor_t) * osd_nslots[type], + M_OSD, M_WAITOK); + rm_wlock(&osd_object_lock[type]); + bcopy(osd_destructors[type], newptr, + sizeof(osd_destructor_t) * i); + free(osd_destructors[type], M_OSD); + osd_destructors[type] = newptr; + rm_wunlock(&osd_object_lock[type]); + OSD_DEBUG("New slot allocated (type=%u, slot=%u).", + type, i + 1); + } + + osd_destructors[type][i] = destructor; + if (osd_nmethods[type] != 0) { + for (m = 0; m < osd_nmethods[type]; m++) + osd_methods[type][i * osd_nmethods[type] + m] = + methods != NULL ? methods[m] : NULL; + } + sx_xunlock(&osd_module_lock[type]); + return (i + 1); +} + +void +osd_deregister(u_int type, u_int slot) +{ + struct osd *osd, *tosd; + + KASSERT(type >= OSD_FIRST && type <= OSD_LAST, ("Invalid type.")); + KASSERT(slot > 0, ("Invalid slot.")); + KASSERT(osd_destructors[type][slot - 1] != NULL, ("Unused slot.")); + + sx_xlock(&osd_module_lock[type]); + rm_wlock(&osd_object_lock[type]); + /* + * Free all OSD for the given slot. + */ + mtx_lock(&osd_list_lock[type]); + LIST_FOREACH_SAFE(osd, &osd_list[type], osd_next, tosd) + do_osd_del(type, osd, slot, 1); + mtx_unlock(&osd_list_lock[type]); + /* + * Set destructor to NULL to free the slot. + */ + osd_destructors[type][slot - 1] = NULL; + if (slot == osd_nslots[type]) { + osd_nslots[type]--; + osd_destructors[type] = realloc(osd_destructors[type], + sizeof(osd_destructor_t) * osd_nslots[type], M_OSD, + M_NOWAIT | M_ZERO); + if (osd_nmethods[type] != 0) + osd_methods[type] = realloc(osd_methods[type], + sizeof(osd_method_t) * osd_nslots[type] * + osd_nmethods[type], M_OSD, M_NOWAIT | M_ZERO); + /* + * We always reallocate to smaller size, so we assume it will + * always succeed. + */ + KASSERT(osd_destructors[type] != NULL && + (osd_nmethods[type] == 0 || osd_methods[type] != NULL), + ("realloc() failed")); + OSD_DEBUG("Deregistration of the last slot (type=%u, slot=%u).", + type, slot); + } else { + OSD_DEBUG("Slot deregistration (type=%u, slot=%u).", + type, slot); + } + rm_wunlock(&osd_object_lock[type]); + sx_xunlock(&osd_module_lock[type]); +} + +int +osd_set(u_int type, struct osd *osd, u_int slot, void *value) +{ + struct rm_priotracker tracker; + + KASSERT(type >= OSD_FIRST && type <= OSD_LAST, ("Invalid type.")); + KASSERT(slot > 0, ("Invalid slot.")); + KASSERT(osd_destructors[type][slot - 1] != NULL, ("Unused slot.")); + + rm_rlock(&osd_object_lock[type], &tracker); + if (slot > osd->osd_nslots) { + if (value == NULL) { + OSD_DEBUG( + "Not allocating null slot (type=%u, slot=%u).", + type, slot); + rm_runlock(&osd_object_lock[type], &tracker); + return (0); + } else if (osd->osd_nslots == 0) { + /* + * First OSD for this object, so we need to allocate + * space and put it onto the list. + */ + osd->osd_slots = malloc(sizeof(void *) * slot, M_OSD, + M_NOWAIT | M_ZERO); + if (osd->osd_slots == NULL) { + rm_runlock(&osd_object_lock[type], &tracker); + return (ENOMEM); + } + osd->osd_nslots = slot; + mtx_lock(&osd_list_lock[type]); + LIST_INSERT_HEAD(&osd_list[type], osd, osd_next); + mtx_unlock(&osd_list_lock[type]); + OSD_DEBUG("Setting first slot (type=%u).", type); + } else { + void *newptr; + + /* + * Too few slots allocated here, needs to extend + * the array. + */ + newptr = realloc(osd->osd_slots, sizeof(void *) * slot, + M_OSD, M_NOWAIT | M_ZERO); + if (newptr == NULL) { + rm_runlock(&osd_object_lock[type], &tracker); + return (ENOMEM); + } + osd->osd_slots = newptr; + osd->osd_nslots = slot; + OSD_DEBUG("Growing slots array (type=%u).", type); + } + } + OSD_DEBUG("Setting slot value (type=%u, slot=%u, value=%p).", type, + slot, value); + osd->osd_slots[slot - 1] = value; + rm_runlock(&osd_object_lock[type], &tracker); + return (0); +} + +void * +osd_get(u_int type, struct osd *osd, u_int slot) +{ + struct rm_priotracker tracker; + void *value; + + KASSERT(type >= OSD_FIRST && type <= OSD_LAST, ("Invalid type.")); + KASSERT(slot > 0, ("Invalid slot.")); + KASSERT(osd_destructors[type][slot - 1] != NULL, ("Unused slot.")); + + rm_rlock(&osd_object_lock[type], &tracker); + if (slot > osd->osd_nslots) { + value = NULL; + OSD_DEBUG("Slot doesn't exist (type=%u, slot=%u).", type, slot); + } else { + value = osd->osd_slots[slot - 1]; + OSD_DEBUG("Returning slot value (type=%u, slot=%u, value=%p).", + type, slot, value); + } + rm_runlock(&osd_object_lock[type], &tracker); + return (value); +} + +void +osd_del(u_int type, struct osd *osd, u_int slot) +{ + struct rm_priotracker tracker; + + rm_rlock(&osd_object_lock[type], &tracker); + do_osd_del(type, osd, slot, 0); + rm_runlock(&osd_object_lock[type], &tracker); +} + +static void +do_osd_del(u_int type, struct osd *osd, u_int slot, int list_locked) +{ + int i; + + KASSERT(type >= OSD_FIRST && type <= OSD_LAST, ("Invalid type.")); + KASSERT(slot > 0, ("Invalid slot.")); + KASSERT(osd_destructors[type][slot - 1] != NULL, ("Unused slot.")); + + OSD_DEBUG("Deleting slot (type=%u, slot=%u).", type, slot); + + if (slot > osd->osd_nslots) { + OSD_DEBUG("Slot doesn't exist (type=%u, slot=%u).", type, slot); + return; + } + if (osd->osd_slots[slot - 1] != NULL) { + osd_destructors[type][slot - 1](osd->osd_slots[slot - 1]); + osd->osd_slots[slot - 1] = NULL; + } + for (i = osd->osd_nslots - 1; i >= 0; i--) { + if (osd->osd_slots[i] != NULL) { + OSD_DEBUG("Slot still has a value (type=%u, slot=%u).", + type, i + 1); + break; + } + } + if (i == -1) { + /* No values left for this object. */ + OSD_DEBUG("No more slots left (type=%u).", type); + if (!list_locked) + mtx_lock(&osd_list_lock[type]); + LIST_REMOVE(osd, osd_next); + if (!list_locked) + mtx_unlock(&osd_list_lock[type]); + free(osd->osd_slots, M_OSD); + osd->osd_slots = NULL; + osd->osd_nslots = 0; + } else if (slot == osd->osd_nslots) { + /* This was the last slot. */ + osd->osd_slots = realloc(osd->osd_slots, + sizeof(void *) * (i + 1), M_OSD, M_NOWAIT | M_ZERO); + /* + * We always reallocate to smaller size, so we assume it will + * always succeed. + */ + KASSERT(osd->osd_slots != NULL, ("realloc() failed")); + osd->osd_nslots = i + 1; + OSD_DEBUG("Reducing slots array to %u (type=%u).", + osd->osd_nslots, type); + } +} + +int +osd_call(u_int type, u_int method, void *obj, void *data) +{ + osd_method_t methodfun; + int error, i; + + KASSERT(type >= OSD_FIRST && type <= OSD_LAST, ("Invalid type.")); + KASSERT(method < osd_nmethods[type], ("Invalid method.")); + + /* + * Call this method for every slot that defines it, stopping if an + * error is encountered. + */ + error = 0; + sx_slock(&osd_module_lock[type]); + for (i = 0; i < osd_nslots[type]; i++) { + methodfun = + osd_methods[type][i * osd_nmethods[type] + method]; + if (methodfun != NULL && (error = methodfun(obj, data)) != 0) + break; + } + sx_sunlock(&osd_module_lock[type]); + return (error); +} + +void +osd_exit(u_int type, struct osd *osd) +{ + struct rm_priotracker tracker; + u_int i; + + KASSERT(type >= OSD_FIRST && type <= OSD_LAST, ("Invalid type.")); + + if (osd->osd_nslots == 0) { + KASSERT(osd->osd_slots == NULL, ("Non-null osd_slots.")); + /* No OSD attached, just leave. */ + return; + } + + rm_rlock(&osd_object_lock[type], &tracker); + for (i = 1; i <= osd->osd_nslots; i++) { + if (osd_destructors[type][i - 1] != NULL) + do_osd_del(type, osd, i, 0); + else + OSD_DEBUG("Unused slot (type=%u, slot=%u).", type, i); + } + rm_runlock(&osd_object_lock[type], &tracker); + OSD_DEBUG("Object exit (type=%u).", type); +} + +static void +osd_init(void *arg __unused) +{ + u_int i; + + for (i = OSD_FIRST; i <= OSD_LAST; i++) { + osd_nslots[i] = 0; + LIST_INIT(&osd_list[i]); + sx_init(&osd_module_lock[i], "osd_module"); + rm_init(&osd_object_lock[i], "osd_object"); + mtx_init(&osd_list_lock[i], "osd_list", NULL, MTX_DEF); + osd_destructors[i] = NULL; + osd_methods[i] = NULL; + } +} +SYSINIT(osd, SI_SUB_LOCK, SI_ORDER_ANY, osd_init, NULL); diff --git a/freebsd/sys/kern/kern_subr.c b/freebsd/sys/kern/kern_subr.c index ec671200..0cbc75b9 100644 --- a/freebsd/sys/kern/kern_subr.c +++ b/freebsd/sys/kern/kern_subr.c @@ -47,6 +47,7 @@ __FBSDID("$FreeBSD$"); #include <sys/ktr.h> #include <sys/limits.h> #include <rtems/bsd/sys/lock.h> +#include <sys/mman.h> #include <sys/mutex.h> #include <sys/proc.h> #include <sys/malloc.h> @@ -56,6 +57,7 @@ __FBSDID("$FreeBSD$"); #include <sys/vnode.h> #include <vm/vm.h> +#include <vm/vm_extern.h> #include <vm/vm_page.h> #include <vm/vm_map.h> #include <sys/uio.h> @@ -67,7 +69,11 @@ __FBSDID("$FreeBSD$"); #ifndef __rtems__ SYSCTL_INT(_kern, KERN_IOV_MAX, iov_max, CTLFLAG_RD, NULL, UIO_MAXIOV, "Maximum number of elements in an I/O vector; sysconf(_SC_IOV_MAX)"); +#endif /* __rtems__ */ + +static int uiomove_faultflag(void *cp, int n, struct uio *uio, int nofault); +#ifndef __rtems__ #ifdef ZERO_COPY_SOCKETS /* Declared in uipc_socket.c */ extern int so_zero_copy_receive; @@ -132,31 +138,75 @@ retry: return(KERN_SUCCESS); } #endif /* ZERO_COPY_SOCKETS */ + +int +copyin_nofault(const void *udaddr, void *kaddr, size_t len) +{ + int error, save; + + save = vm_fault_disable_pagefaults(); + error = copyin(udaddr, kaddr, len); + vm_fault_enable_pagefaults(save); + return (error); +} + +int +copyout_nofault(const void *kaddr, void *udaddr, size_t len) +{ + int error, save; + + save = vm_fault_disable_pagefaults(); + error = copyout(kaddr, udaddr, len); + vm_fault_enable_pagefaults(save); + return (error); +} #endif /* __rtems__ */ int uiomove(void *cp, int n, struct uio *uio) { + + return (uiomove_faultflag(cp, n, uio, 0)); +} + +int +uiomove_nofault(void *cp, int n, struct uio *uio) +{ + + return (uiomove_faultflag(cp, n, uio, 1)); +} + +static int +uiomove_faultflag(void *cp, int n, struct uio *uio, int nofault) +{ #ifndef __rtems__ - struct thread *td = curthread; + struct thread *td; #endif /* __rtems__ */ struct iovec *iov; u_int cnt; - int error = 0; -#ifndef __rtems__ - int save = 0; -#endif /* __rtems__ */ + int error, newflags, save; KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE, ("uiomove: mode")); - KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == curthread, + KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == td, ("uiomove proc")); - WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, - "Calling uiomove()"); + if (!nofault) + WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, + "Calling uiomove()"); #ifndef __rtems__ - save = td->td_pflags & TDP_DEADLKTREAT; - td->td_pflags |= TDP_DEADLKTREAT; + /* XXX does it make a sense to set TDP_DEADLKTREAT for UIO_SYSSPACE ? */ + newflags = TDP_DEADLKTREAT; + if (uio->uio_segflg == UIO_USERSPACE && nofault) { + /* + * Fail if a non-spurious page fault occurs. + */ + newflags |= TDP_NOFAULTING | TDP_RESETSPUR; + } + save = curthread_pflags_set(newflags); +#else /* __rtems__ */ + (void) newflags; + (void) save; #endif /* __rtems__ */ while (n > 0 && uio->uio_resid) { @@ -203,8 +253,7 @@ uiomove(void *cp, int n, struct uio *uio) } out: #ifndef __rtems__ - if (save == 0) - td->td_pflags &= ~TDP_DEADLKTREAT; + curthread_pflags_restore(save); #endif /* __rtems__ */ return (error); } @@ -388,9 +437,7 @@ hashinit_flags(int elements, struct malloc_type *type, u_long *hashmask, LIST_HEAD(generic, generic) *hashtbl; int i; - if (elements <= 0) - panic("hashinit: bad elements"); - + KASSERT(elements > 0, ("%s: bad elements", __func__)); /* Exactly one of HASH_WAITOK and HASH_NOWAIT must be set. */ KASSERT((flags & HASH_WAITOK) ^ (flags & HASH_NOWAIT), ("Bad flags (0x%x) passed to hashinit_flags", flags)); @@ -431,8 +478,7 @@ hashdestroy(void *vhashtbl, struct malloc_type *type, u_long hashmask) hashtbl = vhashtbl; for (hp = hashtbl; hp <= &hashtbl[hashmask]; hp++) - if (!LIST_EMPTY(hp)) - panic("hashdestroy: hash not empty"); + KASSERT(LIST_EMPTY(hp), ("%s: hash not empty", __func__)); free(hashtbl, type); } @@ -451,8 +497,7 @@ phashinit(int elements, struct malloc_type *type, u_long *nentries) LIST_HEAD(generic, generic) *hashtbl; int i; - if (elements <= 0) - panic("phashinit: bad elements"); + KASSERT(elements > 0, ("%s: bad elements", __func__)); for (i = 1, hashsize = primes[1]; hashsize <= elements;) { i++; if (i == NPRIMES) @@ -471,16 +516,8 @@ phashinit(int elements, struct malloc_type *type, u_long *nentries) void uio_yield(void) { - struct thread *td; - td = curthread; - DROP_GIANT(); - thread_lock(td); - sched_prio(td, td->td_user_pri); - mi_switch(SW_INVOL | SWT_RELINQUISH, NULL); - thread_unlock(td); - rtems_task_wake_after(RTEMS_YIELD_PROCESSOR); - PICKUP_GIANT(); + kern_yield(PRI_USER); } int @@ -591,4 +628,55 @@ cloneuio(struct uio *uiop) bcopy(uiop->uio_iov, uio->uio_iov, iovlen); return (uio); } + +/* + * Map some anonymous memory in user space of size sz, rounded up to the page + * boundary. + */ +int +copyout_map(struct thread *td, vm_offset_t *addr, size_t sz) +{ + struct vmspace *vms; + int error; + vm_size_t size; + + vms = td->td_proc->p_vmspace; + + /* + * Map somewhere after heap in process memory. + */ + PROC_LOCK(td->td_proc); + *addr = round_page((vm_offset_t)vms->vm_daddr + + lim_max(td->td_proc, RLIMIT_DATA)); + PROC_UNLOCK(td->td_proc); + + /* round size up to page boundry */ + size = (vm_size_t)round_page(sz); + + error = vm_mmap(&vms->vm_map, addr, size, PROT_READ | PROT_WRITE, + VM_PROT_ALL, MAP_PRIVATE | MAP_ANON, OBJT_DEFAULT, NULL, 0); + + return (error); +} + +/* + * Unmap memory in user space. + */ +int +copyout_unmap(struct thread *td, vm_offset_t addr, size_t sz) +{ + vm_map_t map; + vm_size_t size; + + if (sz == 0) + return (0); + + map = &td->td_proc->p_vmspace->vm_map; + size = (vm_size_t)round_page(sz); + + if (vm_map_remove(map, addr, addr + size) != KERN_SUCCESS) + return (EINVAL); + + return (0); +} #endif /* __rtems__ */ diff --git a/freebsd/sys/kern/kern_sysctl.c b/freebsd/sys/kern/kern_sysctl.c index 750de376..fad7ec10 100644 --- a/freebsd/sys/kern/kern_sysctl.c +++ b/freebsd/sys/kern/kern_sysctl.c @@ -44,6 +44,7 @@ __FBSDID("$FreeBSD$"); #include <rtems/bsd/local/opt_ktrace.h> #include <rtems/bsd/sys/param.h> +#include <sys/fail.h> #include <sys/systm.h> #include <sys/kernel.h> #include <sys/sysctl.h> @@ -53,6 +54,7 @@ __FBSDID("$FreeBSD$"); #include <sys/jail.h> #include <rtems/bsd/sys/lock.h> #include <sys/mutex.h> +#include <sys/sbuf.h> #include <sys/sx.h> #include <sys/sysproto.h> #include <sys/uio.h> @@ -87,13 +89,12 @@ static MALLOC_DEFINE(M_SYSCTLTMP, "sysctltmp", "sysctl temp output buffer"); static struct sx sysctllock; static struct sx sysctlmemlock; -#define SYSCTL_SLOCK() sx_slock(&sysctllock) -#define SYSCTL_SUNLOCK() sx_sunlock(&sysctllock) #define SYSCTL_XLOCK() sx_xlock(&sysctllock) #define SYSCTL_XUNLOCK() sx_xunlock(&sysctllock) #define SYSCTL_ASSERT_XLOCKED() sx_assert(&sysctllock, SA_XLOCKED) -#define SYSCTL_ASSERT_LOCKED() sx_assert(&sysctllock, SA_LOCKED) #define SYSCTL_INIT() sx_init(&sysctllock, "sysctl lock") +#define SYSCTL_SLEEP(ch, wmesg, timo) \ + sx_sleep(ch, &sysctllock, 0, wmesg, timo) static int sysctl_root(SYSCTL_HANDLER_ARGS); @@ -107,7 +108,7 @@ sysctl_find_oidname(const char *name, struct sysctl_oid_list *list) { struct sysctl_oid *oidp; - SYSCTL_ASSERT_LOCKED(); + SYSCTL_ASSERT_XLOCKED(); SLIST_FOREACH(oidp, list, oid_link) { if (strcmp(oidp->oid_name, name) == 0) { return (oidp); @@ -314,7 +315,7 @@ sysctl_ctx_entry_find(struct sysctl_ctx_list *clist, struct sysctl_oid *oidp) { struct sysctl_ctx_entry *e; - SYSCTL_ASSERT_LOCKED(); + SYSCTL_ASSERT_XLOCKED(); if (clist == NULL || oidp == NULL) return(NULL); TAILQ_FOREACH(e, clist, link) { @@ -410,10 +411,20 @@ sysctl_remove_oid_locked(struct sysctl_oid *oidp, int del, int recurse) } sysctl_unregister_oid(oidp); if (del) { + /* + * Wait for all threads running the handler to drain. + * This preserves the previous behavior when the + * sysctl lock was held across a handler invocation, + * and is necessary for module unload correctness. + */ + while (oidp->oid_running > 0) { + oidp->oid_kind |= CTLFLAG_DYING; + SYSCTL_SLEEP(&oidp->oid_running, "oidrm", 0); + } if (oidp->oid_descr) - free((void *)(uintptr_t)(const void *)oidp->oid_descr, M_SYSCTLOID); - free((void *)(uintptr_t)(const void *)oidp->oid_name, - M_SYSCTLOID); + free(__DECONST(char *, oidp->oid_descr), + M_SYSCTLOID); + free(__DECONST(char *, oidp->oid_name), M_SYSCTLOID); free(oidp, M_SYSCTLOID); } } @@ -430,8 +441,6 @@ sysctl_add_oid(struct sysctl_ctx_list *clist, struct sysctl_oid_list *parent, int (*handler)(SYSCTL_HANDLER_ARGS), const char *fmt, const char *descr) { struct sysctl_oid *oidp; - ssize_t len; - char *newname; /* You have to hook up somewhere.. */ if (parent == NULL) @@ -458,11 +467,7 @@ sysctl_add_oid(struct sysctl_ctx_list *clist, struct sysctl_oid_list *parent, SLIST_NEXT(oidp, oid_link) = NULL; oidp->oid_number = number; oidp->oid_refcnt = 1; - len = strlen(name); - newname = malloc(len + 1, M_SYSCTLOID, M_WAITOK); - bcopy(name, newname, len + 1); - newname[len] = '\0'; - oidp->oid_name = newname; + oidp->oid_name = strdup(name, M_SYSCTLOID); oidp->oid_handler = handler; oidp->oid_kind = CTLFLAG_DYN | kind; if ((kind & CTLTYPE) == CTLTYPE_NODE) { @@ -475,12 +480,8 @@ sysctl_add_oid(struct sysctl_ctx_list *clist, struct sysctl_oid_list *parent, oidp->oid_arg2 = arg2; } oidp->oid_fmt = fmt; - if (descr) { - int len = strlen(descr) + 1; - oidp->oid_descr = malloc(len, M_SYSCTLOID, M_WAITOK); - if (oidp->oid_descr) - strcpy((char *)(uintptr_t)(const void *)oidp->oid_descr, descr); - } + if (descr) + oidp->oid_descr = strdup(descr, M_SYSCTLOID); /* Update the context, if used */ if (clist != NULL) sysctl_ctx_entry_add(clist, oidp); @@ -496,16 +497,12 @@ sysctl_add_oid(struct sysctl_ctx_list *clist, struct sysctl_oid_list *parent, void sysctl_rename_oid(struct sysctl_oid *oidp, const char *name) { - ssize_t len; char *newname; - void *oldname; + char *oldname; - len = strlen(name); - newname = malloc(len + 1, M_SYSCTLOID, M_WAITOK); - bcopy(name, newname, len + 1); - newname[len] = '\0'; + newname = strdup(name, M_SYSCTLOID); SYSCTL_XLOCK(); - oldname = (void *)(uintptr_t)(const void *)oidp->oid_name; + oldname = __DECONST(char *, oidp->oid_name); oidp->oid_name = newname; SYSCTL_XUNLOCK(); free(oldname, M_SYSCTLOID); @@ -582,7 +579,7 @@ sysctl_sysctl_debug_dump_node(struct sysctl_oid_list *l, int i) int k; struct sysctl_oid *oidp; - SYSCTL_ASSERT_LOCKED(); + SYSCTL_ASSERT_XLOCKED(); SLIST_FOREACH(oidp, l, oid_link) { for (k=0; k<i; k++) @@ -623,7 +620,9 @@ sysctl_sysctl_debug(SYSCTL_HANDLER_ARGS) error = priv_check(req->td, PRIV_SYSCTL_DEBUG); if (error) return (error); + SYSCTL_XLOCK(); sysctl_sysctl_debug_dump_node(&sysctl__children, 0); + SYSCTL_XUNLOCK(); return (ENOENT); } @@ -641,7 +640,7 @@ sysctl_sysctl_name(SYSCTL_HANDLER_ARGS) struct sysctl_oid_list *lsp = &sysctl__children, *lsp2; char buf[10]; - SYSCTL_ASSERT_LOCKED(); + SYSCTL_XLOCK(); while (namelen) { if (!lsp) { snprintf(buf,sizeof(buf),"%d",*name); @@ -650,7 +649,7 @@ sysctl_sysctl_name(SYSCTL_HANDLER_ARGS) if (!error) error = SYSCTL_OUT(req, buf, strlen(buf)); if (error) - return (error); + goto out; namelen--; name++; continue; @@ -666,7 +665,7 @@ sysctl_sysctl_name(SYSCTL_HANDLER_ARGS) error = SYSCTL_OUT(req, oid->oid_name, strlen(oid->oid_name)); if (error) - return (error); + goto out; namelen--; name++; @@ -677,12 +676,15 @@ sysctl_sysctl_name(SYSCTL_HANDLER_ARGS) if (oid->oid_handler) break; - lsp2 = (struct sysctl_oid_list *)oid->oid_arg1; + lsp2 = SYSCTL_CHILDREN(oid); break; } lsp = lsp2; } - return (SYSCTL_OUT(req, "", 1)); + error = SYSCTL_OUT(req, "", 1); + out: + SYSCTL_XUNLOCK(); + return (error); } static SYSCTL_NODE(_sysctl, 1, name, CTLFLAG_RD, sysctl_sysctl_name, ""); @@ -693,7 +695,7 @@ sysctl_sysctl_next_ls(struct sysctl_oid_list *lsp, int *name, u_int namelen, { struct sysctl_oid *oidp; - SYSCTL_ASSERT_LOCKED(); + SYSCTL_ASSERT_XLOCKED(); *len = level; SLIST_FOREACH(oidp, lsp, oid_link) { *next = oidp->oid_number; @@ -708,7 +710,7 @@ sysctl_sysctl_next_ls(struct sysctl_oid_list *lsp, int *name, u_int namelen, if (oidp->oid_handler) /* We really should call the handler here...*/ return (0); - lsp = (struct sysctl_oid_list *)oidp->oid_arg1; + lsp = SYSCTL_CHILDREN(oidp); if (!sysctl_sysctl_next_ls(lsp, 0, 0, next+1, len, level+1, oidpp)) return (0); @@ -723,7 +725,7 @@ sysctl_sysctl_next_ls(struct sysctl_oid_list *lsp, int *name, u_int namelen, return (0); if (oidp->oid_handler) return (0); - lsp = (struct sysctl_oid_list *)oidp->oid_arg1; + lsp = SYSCTL_CHILDREN(oidp); if (!sysctl_sysctl_next_ls(lsp, name+1, namelen-1, next+1, len, level+1, oidpp)) return (0); @@ -735,7 +737,7 @@ sysctl_sysctl_next_ls(struct sysctl_oid_list *lsp, int *name, u_int namelen, if (oidp->oid_handler) continue; - lsp = (struct sysctl_oid_list *)oidp->oid_arg1; + lsp = SYSCTL_CHILDREN(oidp); if (!sysctl_sysctl_next_ls(lsp, name+1, namelen-1, next+1, len, level+1, oidpp)) return (0); @@ -757,7 +759,9 @@ sysctl_sysctl_next(SYSCTL_HANDLER_ARGS) struct sysctl_oid_list *lsp = &sysctl__children; int newoid[CTL_MAXNAME]; + SYSCTL_XLOCK(); i = sysctl_sysctl_next_ls(lsp, name, namelen, newoid, &j, 1, &oid); + SYSCTL_XUNLOCK(); if (i) return (ENOENT); error = SYSCTL_OUT(req, newoid, j * sizeof (int)); @@ -769,39 +773,26 @@ static SYSCTL_NODE(_sysctl, 2, next, CTLFLAG_RD, sysctl_sysctl_next, ""); static int name2oid(char *name, int *oid, int *len, struct sysctl_oid **oidpp) { - int i; struct sysctl_oid *oidp; struct sysctl_oid_list *lsp = &sysctl__children; char *p; - SYSCTL_ASSERT_LOCKED(); - - if (!*name) - return (ENOENT); - - p = name + strlen(name) - 1 ; - if (*p == '.') - *p = '\0'; - - *len = 0; - - for (p = name; *p && *p != '.'; p++) - ; - i = *p; - if (i == '.') - *p = '\0'; + SYSCTL_ASSERT_XLOCKED(); - oidp = SLIST_FIRST(lsp); + for (*len = 0; *len < CTL_MAXNAME;) { + p = strsep(&name, "."); - while (oidp && *len < CTL_MAXNAME) { - if (strcmp(name, oidp->oid_name)) { - oidp = SLIST_NEXT(oidp, oid_link); - continue; + oidp = SLIST_FIRST(lsp); + for (;; oidp = SLIST_NEXT(oidp, oid_link)) { + if (oidp == NULL) + return (ENOENT); + if (strcmp(p, oidp->oid_name) == 0) + break; } *oid++ = oidp->oid_number; (*len)++; - if (!i) { + if (name == NULL || *name == '\0') { if (oidpp) *oidpp = oidp; return (0); @@ -813,14 +804,7 @@ name2oid(char *name, int *oid, int *len, struct sysctl_oid **oidpp) if (oidp->oid_handler) break; - lsp = (struct sysctl_oid_list *)oidp->oid_arg1; - oidp = SLIST_FIRST(lsp); - name = p+1; - for (p = name; *p && *p != '.'; p++) - ; - i = *p; - if (i == '.') - *p = '\0'; + lsp = SYSCTL_CHILDREN(oidp); } return (ENOENT); } @@ -832,8 +816,6 @@ sysctl_sysctl_name2oid(SYSCTL_HANDLER_ARGS) int error, oid[CTL_MAXNAME], len; struct sysctl_oid *op = 0; - SYSCTL_ASSERT_LOCKED(); - if (!req->newlen) return (ENOENT); if (req->newlen >= MAXPATHLEN) /* XXX arbitrary, undocumented */ @@ -848,8 +830,10 @@ sysctl_sysctl_name2oid(SYSCTL_HANDLER_ARGS) } p [req->newlen] = '\0'; - + len = 0; + SYSCTL_XLOCK(); error = name2oid(p, oid, &len, &op); + SYSCTL_XUNLOCK(); free(p, M_SYSCTL); @@ -869,16 +853,21 @@ sysctl_sysctl_oidfmt(SYSCTL_HANDLER_ARGS) struct sysctl_oid *oid; int error; + SYSCTL_XLOCK(); error = sysctl_find_oid(arg1, arg2, &oid, NULL, req); if (error) - return (error); + goto out; - if (!oid->oid_fmt) - return (ENOENT); + if (oid->oid_fmt == NULL) { + error = ENOENT; + goto out; + } error = SYSCTL_OUT(req, &oid->oid_kind, sizeof(oid->oid_kind)); if (error) - return (error); + goto out; error = SYSCTL_OUT(req, oid->oid_fmt, strlen(oid->oid_fmt) + 1); + out: + SYSCTL_XUNLOCK(); return (error); } @@ -892,13 +881,18 @@ sysctl_sysctl_oiddescr(SYSCTL_HANDLER_ARGS) struct sysctl_oid *oid; int error; + SYSCTL_XLOCK(); error = sysctl_find_oid(arg1, arg2, &oid, NULL, req); if (error) - return (error); + goto out; - if (!oid->oid_descr) - return (ENOENT); + if (oid->oid_descr == NULL) { + error = ENOENT; + goto out; + } error = SYSCTL_OUT(req, oid->oid_descr, strlen(oid->oid_descr) + 1); + out: + SYSCTL_XUNLOCK(); return (error); } @@ -966,7 +960,10 @@ sysctl_msec_to_ticks(SYSCTL_HANDLER_ARGS) /* - * Handle a long, signed or unsigned. arg1 points to it. + * Handle a long, signed or unsigned. + * Two cases: + * a variable: point arg1 at it. + * a constant: pass it in arg2. */ int @@ -981,9 +978,10 @@ sysctl_handle_long(SYSCTL_HANDLER_ARGS) /* * Attempt to get a coherent snapshot by making a copy of the data. */ - if (!arg1) - return (EINVAL); - tmplong = *(long *)arg1; + if (arg1) + tmplong = *(long *)arg1; + else + tmplong = arg2; #ifdef SCTL_MASK32 if (req->flags & SCTL_MASK32) { tmpint = tmplong; @@ -995,18 +993,24 @@ sysctl_handle_long(SYSCTL_HANDLER_ARGS) if (error || !req->newptr) return (error); + if (!arg1) + error = EPERM; #ifdef SCTL_MASK32 - if (req->flags & SCTL_MASK32) { + else if (req->flags & SCTL_MASK32) { error = SYSCTL_IN(req, &tmpint, sizeof(int)); *(long *)arg1 = (long)tmpint; - } else + } #endif + else error = SYSCTL_IN(req, arg1, sizeof(long)); return (error); } /* - * Handle a 64 bit int, signed or unsigned. arg1 points to it. + * Handle a 64 bit int, signed or unsigned. + * Two cases: + * a variable: point arg1 at it. + * a constant: pass it in arg2. */ int @@ -1018,15 +1022,19 @@ sysctl_handle_quad(SYSCTL_HANDLER_ARGS) /* * Attempt to get a coherent snapshot by making a copy of the data. */ - if (!arg1) - return (EINVAL); - tmpout = *(uint64_t *)arg1; + if (arg1) + tmpout = *(uint64_t *)arg1; + else + tmpout = arg2; error = SYSCTL_OUT(req, &tmpout, sizeof(uint64_t)); if (error || !req->newptr) return (error); - error = SYSCTL_IN(req, arg1, sizeof(uint64_t)); + if (!arg1) + error = EPERM; + else + error = SYSCTL_IN(req, arg1, sizeof(uint64_t)); return (error); } @@ -1192,9 +1200,9 @@ kernel_sysctl(struct thread *td, int *name, u_int namelen, void *old, req.newfunc = sysctl_new_kernel; req.lock = REQ_LOCKED; - SYSCTL_SLOCK(); + SYSCTL_XLOCK(); error = sysctl_root(0, name, namelen, &req); - SYSCTL_SUNLOCK(); + SYSCTL_XUNLOCK(); if (req.lock == REQ_WIRED && req.validlen > 0) vsunlock(req.oldptr, req.validlen); @@ -1241,8 +1249,8 @@ kernel_sysctlbyname(struct thread *td, char *name, void *old, size_t *oldlenp, static int sysctl_old_user(struct sysctl_req *req, const void *p, size_t l) { - int error = 0; size_t i, len, origidx; + int error; origidx = req->oldidx; req->oldidx += l; @@ -1263,10 +1271,14 @@ sysctl_old_user(struct sysctl_req *req, const void *p, size_t l) else { if (i > len - origidx) i = len - origidx; - error = copyout(p, (char *)req->oldptr + origidx, i); + if (req->lock == REQ_WIRED) { + error = copyout_nofault(p, (char *)req->oldptr + + origidx, i); + } else + error = copyout(p, (char *)req->oldptr + origidx, i); + if (error != 0) + return (error); } - if (error) - return (error); if (i < l) return (ENOMEM); return (0); @@ -1322,37 +1334,43 @@ int sysctl_find_oid(int *name, u_int namelen, struct sysctl_oid **noid, int *nindx, struct sysctl_req *req) { + struct sysctl_oid_list *lsp; struct sysctl_oid *oid; int indx; - SYSCTL_ASSERT_LOCKED(); - oid = SLIST_FIRST(&sysctl__children); + SYSCTL_ASSERT_XLOCKED(); + lsp = &sysctl__children; indx = 0; - while (oid && indx < CTL_MAXNAME) { - if (oid->oid_number == name[indx]) { - indx++; - if (oid->oid_kind & CTLFLAG_NOLOCK) - req->lock = REQ_UNLOCKED; - if ((oid->oid_kind & CTLTYPE) == CTLTYPE_NODE) { - if (oid->oid_handler != NULL || - indx == namelen) { - *noid = oid; - if (nindx != NULL) - *nindx = indx; - return (0); - } - oid = SLIST_FIRST( - (struct sysctl_oid_list *)oid->oid_arg1); - } else if (indx == namelen) { + while (indx < CTL_MAXNAME) { + SLIST_FOREACH(oid, lsp, oid_link) { + if (oid->oid_number == name[indx]) + break; + } + if (oid == NULL) + return (ENOENT); + + indx++; + if (oid->oid_kind & CTLFLAG_NOLOCK) + req->lock = REQ_UNLOCKED; + if ((oid->oid_kind & CTLTYPE) == CTLTYPE_NODE) { + if (oid->oid_handler != NULL || indx == namelen) { *noid = oid; if (nindx != NULL) *nindx = indx; + KASSERT((oid->oid_kind & CTLFLAG_DYING) == 0, + ("%s found DYING node %p", __func__, oid)); return (0); - } else { - return (ENOTDIR); } + lsp = SYSCTL_CHILDREN(oid); + } else if (indx == namelen) { + *noid = oid; + if (nindx != NULL) + *nindx = indx; + KASSERT((oid->oid_kind & CTLFLAG_DYING) == 0, + ("%s found DYING node %p", __func__, oid)); + return (0); } else { - oid = SLIST_NEXT(oid, oid_link); + return (ENOTDIR); } } return (ENOENT); @@ -1369,7 +1387,7 @@ sysctl_root(SYSCTL_HANDLER_ARGS) struct sysctl_oid *oid; int error, indx, lvl; - SYSCTL_ASSERT_LOCKED(); + SYSCTL_ASSERT_XLOCKED(); error = sysctl_find_oid(arg1, arg2, &oid, &indx, req); if (error) @@ -1437,12 +1455,23 @@ sysctl_root(SYSCTL_HANDLER_ARGS) if (error != 0) return (error); #endif + oid->oid_running++; + SYSCTL_XUNLOCK(); + if (!(oid->oid_kind & CTLFLAG_MPSAFE)) mtx_lock(&Giant); error = oid->oid_handler(oid, arg1, arg2, req); if (!(oid->oid_kind & CTLFLAG_MPSAFE)) mtx_unlock(&Giant); +#ifndef __rtems__ + KFAIL_POINT_ERROR(_debug_fail_point, sysctl_running, error); +#endif /* __rtems__ */ + + SYSCTL_XLOCK(); + oid->oid_running--; + if (oid->oid_running == 0 && (oid->oid_kind & CTLFLAG_DYING) != 0) + wakeup(&oid->oid_running); return (error); } @@ -1543,9 +1572,9 @@ userland_sysctl(struct thread *td, int *name, u_int namelen, void *old, for (;;) { req.oldidx = 0; req.newidx = 0; - SYSCTL_SLOCK(); + SYSCTL_XLOCK(); error = sysctl_root(0, name, namelen, &req); - SYSCTL_SUNLOCK(); + SYSCTL_XUNLOCK(); if (error != EAGAIN) break; uio_yield(); @@ -1569,4 +1598,29 @@ userland_sysctl(struct thread *td, int *name, u_int namelen, void *old, } return (error); } + +/* + * Drain into a sysctl struct. The user buffer should be wired if a page + * fault would cause issue. + */ +static int +sbuf_sysctl_drain(void *arg, const char *data, int len) +{ + struct sysctl_req *req = arg; + int error; + + error = SYSCTL_OUT(req, data, len); + KASSERT(error >= 0, ("Got unexpected negative value %d", error)); + return (error == 0 ? len : -error); +} + +struct sbuf * +sbuf_new_for_sysctl(struct sbuf *s, char *buf, int length, + struct sysctl_req *req) +{ + + s = sbuf_new(s, buf, length, SBUF_FIXEDLEN); + sbuf_set_drain(s, sbuf_sysctl_drain, req); + return (s); +} #endif /* __rtems__ */ diff --git a/freebsd/sys/kern/kern_timeout.c b/freebsd/sys/kern/kern_timeout.c index a7fe5d59..73cbd571 100644 --- a/freebsd/sys/kern/kern_timeout.c +++ b/freebsd/sys/kern/kern_timeout.c @@ -58,14 +58,18 @@ __FBSDID("$FreeBSD$"); #include <sys/sysctl.h> #include <sys/smp.h> +#ifdef SMP +#include <machine/cpu.h> +#endif + #ifdef __rtems__ int ncallout = 16; #endif /* __rtems__ */ SDT_PROVIDER_DEFINE(callout_execute); -SDT_PROBE_DEFINE(callout_execute, kernel, , callout_start); +SDT_PROBE_DEFINE(callout_execute, kernel, , callout_start, callout-start); SDT_PROBE_ARGTYPE(callout_execute, kernel, , callout_start, 0, "struct callout *"); -SDT_PROBE_DEFINE(callout_execute, kernel, , callout_end); +SDT_PROBE_DEFINE(callout_execute, kernel, , callout_end, callout-end); SDT_PROBE_ARGTYPE(callout_execute, kernel, , callout_end, 0, "struct callout *"); @@ -88,6 +92,21 @@ SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls, CTLFLAG_RD, &avg_mpcalls, 0, int callwheelsize, callwheelbits, callwheelmask; /* + * The callout cpu migration entity represents informations necessary for + * describing the migrating callout to the new callout cpu. + * The cached informations are very important for deferring migration when + * the migrating callout is already running. + */ +struct cc_mig_ent { +#ifdef SMP + void (*ce_migration_func)(void *); + void *ce_migration_arg; + int ce_migration_cpu; + int ce_migration_ticks; +#endif +}; + +/* * There is one struct callout_cpu per cpu, holding all relevant * state for the callout processing thread on the individual CPU. * In particular: @@ -105,6 +124,7 @@ int callwheelsize, callwheelbits, callwheelmask; * when the callout should be served. */ struct callout_cpu { + struct cc_mig_ent cc_migrating_entity; struct mtx cc_lock; struct callout *cc_callout; struct callout_tailq *cc_callwheel; @@ -119,7 +139,13 @@ struct callout_cpu { }; #ifdef SMP +#define cc_migration_func cc_migrating_entity.ce_migration_func +#define cc_migration_arg cc_migrating_entity.ce_migration_arg +#define cc_migration_cpu cc_migrating_entity.ce_migration_cpu +#define cc_migration_ticks cc_migrating_entity.ce_migration_ticks + struct callout_cpu cc_cpu[MAXCPU]; +#define CPUBLOCK MAXCPU #define CC_CPU(cpu) (&cc_cpu[(cpu)]) #define CC_SELF() CC_CPU(PCPU_GET(cpuid)) #else @@ -129,6 +155,7 @@ struct callout_cpu cc_cpu; #endif #define CC_LOCK(cc) mtx_lock_spin(&(cc)->cc_lock) #define CC_UNLOCK(cc) mtx_unlock_spin(&(cc)->cc_lock) +#define CC_LOCK_ASSERT(cc) mtx_assert(&(cc)->cc_lock, MA_OWNED) static int timeout_cpu; @@ -152,6 +179,35 @@ MALLOC_DEFINE(M_CALLOUT, "callout", "Callout datastructures"); */ /* + * Resets the migration entity tied to a specific callout cpu. + */ +static void +cc_cme_cleanup(struct callout_cpu *cc) +{ + +#ifdef SMP + cc->cc_migration_cpu = CPUBLOCK; + cc->cc_migration_ticks = 0; + cc->cc_migration_func = NULL; + cc->cc_migration_arg = NULL; +#endif +} + +/* + * Checks if migration is requested by a specific callout cpu. + */ +static int +cc_cme_migrating(struct callout_cpu *cc) +{ + +#ifdef SMP + return (cc->cc_migration_cpu != CPUBLOCK); +#else + return (0); +#endif +} + +/* * kern_timeout_callwheel_alloc() - kernel low level callwheel initialization * * This code is called very early in the kernel initialization sequence, @@ -237,6 +293,7 @@ callout_cpu_init(struct callout_cpu *cc) for (i = 0; i < callwheelsize; i++) { TAILQ_INIT(&cc->cc_callwheel[i]); } + cc_cme_cleanup(cc); if (cc->cc_callout == NULL) return; for (i = 0; i < ncallout; i++) { @@ -247,6 +304,36 @@ callout_cpu_init(struct callout_cpu *cc) } } +#ifdef SMP +/* + * Switches the cpu tied to a specific callout. + * The function expects a locked incoming callout cpu and returns with + * locked outcoming callout cpu. + */ +static struct callout_cpu * +callout_cpu_switch(struct callout *c, struct callout_cpu *cc, int new_cpu) +{ + struct callout_cpu *new_cc; + + MPASS(c != NULL && cc != NULL); + CC_LOCK_ASSERT(cc); + + /* + * Avoid interrupts and preemption firing after the callout cpu + * is blocked in order to avoid deadlocks as the new thread + * may be willing to acquire the callout cpu lock. + */ + c->c_cpu = CPUBLOCK; + spinlock_enter(); + CC_UNLOCK(cc); + new_cc = CC_CPU(new_cpu); + CC_LOCK(new_cc); + spinlock_exit(); + c->c_cpu = new_cpu; + return (new_cc); +} +#endif + #ifndef __rtems__ /* * kern_timeout_callwheel_init() - initialize previously reserved callwheel @@ -281,11 +368,9 @@ start_softclock(void *dummy) panic("died while creating standard software ithreads"); cc->cc_cookie = softclock_ih; #ifdef SMP - for (cpu = 0; cpu <= mp_maxid; cpu++) { + CPU_FOREACH(cpu) { if (cpu == timeout_cpu) continue; - if (CPU_ABSENT(cpu)) - continue; cc = CC_CPU(cpu); if (swi_add(NULL, "clock", softclock, cc, SWI_CLOCK, INTR_MPSAFE, &cc->cc_cookie)) @@ -340,6 +425,13 @@ callout_lock(struct callout *c) for (;;) { cpu = c->c_cpu; +#ifdef SMP + if (cpu == CPUBLOCK) { + while (c->c_cpu == CPUBLOCK) + cpu_spinwait(); + continue; + } +#endif cc = CC_CPU(cpu); CC_LOCK(cc); if (cpu == c->c_cpu) @@ -349,6 +441,202 @@ callout_lock(struct callout *c) return (cc); } +static void +callout_cc_add(struct callout *c, struct callout_cpu *cc, int to_ticks, + void (*func)(void *), void *arg, int cpu) +{ + + CC_LOCK_ASSERT(cc); + + if (to_ticks <= 0) + to_ticks = 1; + c->c_arg = arg; + c->c_flags |= (CALLOUT_ACTIVE | CALLOUT_PENDING); + c->c_func = func; + c->c_time = cc->cc_ticks + to_ticks; + TAILQ_INSERT_TAIL(&cc->cc_callwheel[c->c_time & callwheelmask], + c, c_links.tqe); +} + +static void +callout_cc_del(struct callout *c, struct callout_cpu *cc) +{ + + if (cc->cc_next == c) + cc->cc_next = TAILQ_NEXT(c, c_links.tqe); + if (c->c_flags & CALLOUT_LOCAL_ALLOC) { + c->c_func = NULL; + SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle); + } +} + +static struct callout * +softclock_call_cc(struct callout *c, struct callout_cpu *cc, int *mpcalls, + int *lockcalls, int *gcalls) +{ + void (*c_func)(void *); + void *c_arg; + struct lock_class *class; + struct lock_object *c_lock; + int c_flags, sharedlock; +#ifdef SMP + struct callout_cpu *new_cc; + void (*new_func)(void *); + void *new_arg; + int new_cpu, new_ticks; +#endif +#ifdef DIAGNOSTIC + struct bintime bt1, bt2; + struct timespec ts2; + static uint64_t maxdt = 36893488147419102LL; /* 2 msec */ + static timeout_t *lastfunc; +#endif + + cc->cc_next = TAILQ_NEXT(c, c_links.tqe); + class = (c->c_lock != NULL) ? LOCK_CLASS(c->c_lock) : NULL; + sharedlock = (c->c_flags & CALLOUT_SHAREDLOCK) ? 0 : 1; + c_lock = c->c_lock; + c_func = c->c_func; + c_arg = c->c_arg; + c_flags = c->c_flags; + if (c->c_flags & CALLOUT_LOCAL_ALLOC) + c->c_flags = CALLOUT_LOCAL_ALLOC; + else + c->c_flags &= ~CALLOUT_PENDING; + cc->cc_curr = c; + cc->cc_cancel = 0; + CC_UNLOCK(cc); + if (c_lock != NULL) { + class->lc_lock(c_lock, sharedlock); + /* + * The callout may have been cancelled + * while we switched locks. + */ + if (cc->cc_cancel) { + class->lc_unlock(c_lock); + goto skip; + } + /* The callout cannot be stopped now. */ + cc->cc_cancel = 1; + + if (c_lock == &Giant.lock_object) { + (*gcalls)++; + CTR3(KTR_CALLOUT, "callout %p func %p arg %p", + c, c_func, c_arg); + } else { + (*lockcalls)++; + CTR3(KTR_CALLOUT, "callout lock %p func %p arg %p", + c, c_func, c_arg); + } + } else { + (*mpcalls)++; + CTR3(KTR_CALLOUT, "callout mpsafe %p func %p arg %p", + c, c_func, c_arg); + } +#ifdef DIAGNOSTIC + binuptime(&bt1); +#endif +#ifndef __rtems__ + THREAD_NO_SLEEPING(); + SDT_PROBE(callout_execute, kernel, , callout_start, c, 0, 0, 0, 0); +#endif /* __rtems__ */ + c_func(c_arg); +#ifndef __rtems__ + SDT_PROBE(callout_execute, kernel, , callout_end, c, 0, 0, 0, 0); + THREAD_SLEEPING_OK(); +#endif /* __rtems__ */ +#ifdef DIAGNOSTIC + binuptime(&bt2); + bintime_sub(&bt2, &bt1); + if (bt2.frac > maxdt) { + if (lastfunc != c_func || bt2.frac > maxdt * 2) { + bintime2timespec(&bt2, &ts2); + printf( + "Expensive timeout(9) function: %p(%p) %jd.%09ld s\n", + c_func, c_arg, (intmax_t)ts2.tv_sec, ts2.tv_nsec); + } + maxdt = bt2.frac; + lastfunc = c_func; + } +#endif + CTR1(KTR_CALLOUT, "callout %p finished", c); + if ((c_flags & CALLOUT_RETURNUNLOCKED) == 0) + class->lc_unlock(c_lock); +skip: + CC_LOCK(cc); + /* + * If the current callout is locally allocated (from + * timeout(9)) then put it on the freelist. + * + * Note: we need to check the cached copy of c_flags because + * if it was not local, then it's not safe to deref the + * callout pointer. + */ + if (c_flags & CALLOUT_LOCAL_ALLOC) { + KASSERT(c->c_flags == CALLOUT_LOCAL_ALLOC, + ("corrupted callout")); + c->c_func = NULL; + SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle); + } + cc->cc_curr = NULL; + if (cc->cc_waiting) { + /* + * There is someone waiting for the + * callout to complete. + * If the callout was scheduled for + * migration just cancel it. + */ + if (cc_cme_migrating(cc)) + cc_cme_cleanup(cc); + cc->cc_waiting = 0; + CC_UNLOCK(cc); + wakeup(&cc->cc_waiting); + CC_LOCK(cc); + } else if (cc_cme_migrating(cc)) { +#ifdef SMP + /* + * If the callout was scheduled for + * migration just perform it now. + */ + new_cpu = cc->cc_migration_cpu; + new_ticks = cc->cc_migration_ticks; + new_func = cc->cc_migration_func; + new_arg = cc->cc_migration_arg; + cc_cme_cleanup(cc); + + /* + * Handle deferred callout stops + */ + if ((c->c_flags & CALLOUT_DFRMIGRATION) == 0) { + CTR3(KTR_CALLOUT, + "deferred cancelled %p func %p arg %p", + c, new_func, new_arg); + callout_cc_del(c, cc); + goto nextc; + } + + c->c_flags &= ~CALLOUT_DFRMIGRATION; + + /* + * It should be assert here that the + * callout is not destroyed but that + * is not easy. + */ + new_cc = callout_cpu_switch(c, cc, new_cpu); + callout_cc_add(c, new_cc, new_ticks, new_func, new_arg, + new_cpu); + CC_UNLOCK(new_cc); + CC_LOCK(cc); +#else + panic("migration should not happen"); +#endif + } +#ifdef SMP +nextc: +#endif + return (cc->cc_next); +} + /* * The callout mechanism is based on the work of Adam M. Costello and * George Varghese, published in a technical report entitled "Redesigning @@ -377,12 +665,6 @@ softclock(void *arg) int mpcalls; int lockcalls; int gcalls; -#ifdef DIAGNOSTIC - struct bintime bt1, bt2; - struct timespec ts2; - static uint64_t maxdt = 36893488147419102LL; /* 2 msec */ - static timeout_t *lastfunc; -#endif #ifndef MAX_SOFTCLOCK_STEPS #define MAX_SOFTCLOCK_STEPS 100 /* Maximum allowed value of steps. */ @@ -404,7 +686,7 @@ softclock(void *arg) cc->cc_softticks++; bucket = &cc->cc_callwheel[curticks & callwheelmask]; c = TAILQ_FIRST(bucket); - while (c) { + while (c != NULL) { depth++; if (c->c_time != curticks) { c = TAILQ_NEXT(c, c_links.tqe); @@ -419,128 +701,10 @@ softclock(void *arg) steps = 0; } } else { - void (*c_func)(void *); - void *c_arg; - struct lock_class *class; - struct lock_object *c_lock; - int c_flags, sharedlock; - - cc->cc_next = TAILQ_NEXT(c, c_links.tqe); TAILQ_REMOVE(bucket, c, c_links.tqe); - class = (c->c_lock != NULL) ? - LOCK_CLASS(c->c_lock) : NULL; - sharedlock = (c->c_flags & CALLOUT_SHAREDLOCK) ? - 0 : 1; - c_lock = c->c_lock; - c_func = c->c_func; - c_arg = c->c_arg; - c_flags = c->c_flags; - if (c->c_flags & CALLOUT_LOCAL_ALLOC) { - c->c_flags = CALLOUT_LOCAL_ALLOC; - } else { - c->c_flags = - (c->c_flags & ~CALLOUT_PENDING); - } - cc->cc_curr = c; - cc->cc_cancel = 0; - CC_UNLOCK(cc); - if (c_lock != NULL) { - class->lc_lock(c_lock, sharedlock); - /* - * The callout may have been cancelled - * while we switched locks. - */ - if (cc->cc_cancel) { - class->lc_unlock(c_lock); - goto skip; - } - /* The callout cannot be stopped now. */ - cc->cc_cancel = 1; - - if (c_lock == &Giant.lock_object) { - gcalls++; - CTR3(KTR_CALLOUT, - "callout %p func %p arg %p", - c, c_func, c_arg); - } else { - lockcalls++; - CTR3(KTR_CALLOUT, "callout lock" - " %p func %p arg %p", - c, c_func, c_arg); - } - } else { - mpcalls++; - CTR3(KTR_CALLOUT, - "callout mpsafe %p func %p arg %p", - c, c_func, c_arg); - } -#ifdef DIAGNOSTIC - binuptime(&bt1); -#endif -#ifndef __rtems__ - THREAD_NO_SLEEPING(); - SDT_PROBE(callout_execute, kernel, , - callout_start, c, 0, 0, 0, 0); -#endif /* __rtems__ */ - c_func(c_arg); -#ifndef __rtems__ - SDT_PROBE(callout_execute, kernel, , - callout_end, c, 0, 0, 0, 0); - THREAD_SLEEPING_OK(); -#endif /* __rtems__ */ -#ifdef DIAGNOSTIC - binuptime(&bt2); - bintime_sub(&bt2, &bt1); - if (bt2.frac > maxdt) { - if (lastfunc != c_func || - bt2.frac > maxdt * 2) { - bintime2timespec(&bt2, &ts2); - printf( - "Expensive timeout(9) function: %p(%p) %jd.%09ld s\n", - c_func, c_arg, - (intmax_t)ts2.tv_sec, - ts2.tv_nsec); - } - maxdt = bt2.frac; - lastfunc = c_func; - } -#endif - CTR1(KTR_CALLOUT, "callout %p finished", c); - if ((c_flags & CALLOUT_RETURNUNLOCKED) == 0) - class->lc_unlock(c_lock); - skip: - CC_LOCK(cc); - /* - * If the current callout is locally - * allocated (from timeout(9)) - * then put it on the freelist. - * - * Note: we need to check the cached - * copy of c_flags because if it was not - * local, then it's not safe to deref the - * callout pointer. - */ - if (c_flags & CALLOUT_LOCAL_ALLOC) { - KASSERT(c->c_flags == - CALLOUT_LOCAL_ALLOC, - ("corrupted callout")); - c->c_func = NULL; - SLIST_INSERT_HEAD(&cc->cc_callfree, c, - c_links.sle); - } - cc->cc_curr = NULL; - if (cc->cc_waiting) { - /* - * There is someone waiting - * for the callout to complete. - */ - cc->cc_waiting = 0; - CC_UNLOCK(cc); - wakeup(&cc->cc_waiting); - CC_LOCK(cc); - } + c = softclock_call_cc(c, cc, &mpcalls, + &lockcalls, &gcalls); steps = 0; - c = cc->cc_next; } } } @@ -650,7 +814,6 @@ callout_reset_on(struct callout *c, int to_ticks, void (*ftn)(void *), */ if (c->c_flags & CALLOUT_LOCAL_ALLOC) cpu = c->c_cpu; -retry: cc = callout_lock(c); if (cc->cc_curr == c) { /* @@ -682,25 +845,31 @@ retry: cancelled = 1; c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING); } + +#ifdef SMP /* - * If the lock must migrate we have to check the state again as - * we can't hold both the new and old locks simultaneously. + * If the callout must migrate try to perform it immediately. + * If the callout is currently running, just defer the migration + * to a more appropriate moment. */ if (c->c_cpu != cpu) { - c->c_cpu = cpu; - CC_UNLOCK(cc); - goto retry; + if (cc->cc_curr == c) { + cc->cc_migration_cpu = cpu; + cc->cc_migration_ticks = to_ticks; + cc->cc_migration_func = ftn; + cc->cc_migration_arg = arg; + c->c_flags |= CALLOUT_DFRMIGRATION; + CTR5(KTR_CALLOUT, + "migration of %p func %p arg %p in %d to %u deferred", + c, c->c_func, c->c_arg, to_ticks, cpu); + CC_UNLOCK(cc); + return (cancelled); + } + cc = callout_cpu_switch(c, cc, cpu); } +#endif - if (to_ticks <= 0) - to_ticks = 1; - - c->c_arg = arg; - c->c_flags |= (CALLOUT_ACTIVE | CALLOUT_PENDING); - c->c_func = ftn; - c->c_time = cc->cc_ticks + to_ticks; - TAILQ_INSERT_TAIL(&cc->cc_callwheel[c->c_time & callwheelmask], - c, c_links.tqe); + callout_cc_add(c, cc, to_ticks, ftn, arg, cpu); CTR5(KTR_CALLOUT, "%sscheduled %p func %p arg %p in %d", cancelled ? "re" : "", c, c->c_func, c->c_arg, to_ticks); CC_UNLOCK(cc); @@ -728,13 +897,13 @@ _callout_stop_safe(c, safe) struct callout *c; int safe; { - struct callout_cpu *cc; + struct callout_cpu *cc, *old_cc; struct lock_class *class; #ifndef __rtems__ int use_lock, sq_locked; #else /* __rtems__ */ int use_lock; -#endif /* __rtems__ */ +#endif /* __rtems__ */ /* * Some old subsystems don't hold Giant while running a callout_stop(), @@ -753,9 +922,30 @@ _callout_stop_safe(c, safe) #ifndef __rtems__ sq_locked = 0; + old_cc = NULL; again: -#endif /* __rtems__ */ +#endif /* __rtems__ */ cc = callout_lock(c); + +#ifndef __rtems__ + /* + * If the callout was migrating while the callout cpu lock was + * dropped, just drop the sleepqueue lock and check the states + * again. + */ + if (sq_locked != 0 && cc != old_cc) { +#ifdef SMP + CC_UNLOCK(cc); + sleepq_release(&old_cc->cc_waiting); + sq_locked = 0; + old_cc = NULL; + goto again; +#else + panic("migration should not happen"); +#endif + } +#endif /* __rtems__ */ + /* * If the callout isn't pending, it's not on the queue, so * don't attempt to remove it from the queue. We can try to @@ -775,7 +965,7 @@ again: #ifndef __rtems__ if (sq_locked) sleepq_release(&cc->cc_waiting); -#endif /* __rtems__ */ +#endif /* __rtems__ */ return (0); } @@ -810,8 +1000,16 @@ again: CC_UNLOCK(cc); sleepq_lock(&cc->cc_waiting); sq_locked = 1; + old_cc = cc; goto again; } + + /* + * Migration could be cancelled here, but + * as long as it is still not sure when it + * will be packed up, just let softclock() + * take care of it. + */ cc->cc_waiting = 1; DROP_GIANT(); CC_UNLOCK(cc); @@ -820,6 +1018,7 @@ again: SLEEPQ_SLEEP, 0); sleepq_wait(&cc->cc_waiting, 0); sq_locked = 0; + old_cc = NULL; /* Reacquire locks previously released. */ PICKUP_GIANT(); @@ -847,9 +1046,17 @@ again: cc->cc_cancel = 1; CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p", c, c->c_func, c->c_arg); + KASSERT(!cc_cme_migrating(cc), + ("callout wrongly scheduled for migration")); CC_UNLOCK(cc); KASSERT(!sq_locked, ("sleepqueue chain locked")); return (1); + } else if ((c->c_flags & CALLOUT_DFRMIGRATION) != 0) { + c->c_flags &= ~CALLOUT_DFRMIGRATION; + CTR3(KTR_CALLOUT, "postponing stop %p func %p arg %p", + c, c->c_func, c->c_arg); + CC_UNLOCK(cc); + return (1); } CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p", c, c->c_func, c->c_arg); @@ -860,23 +1067,16 @@ again: #ifndef __rtems__ if (sq_locked) sleepq_release(&cc->cc_waiting); -#endif /* __rtems__ */ +#endif /* __rtems__ */ c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING); - if (cc->cc_next == c) { - cc->cc_next = TAILQ_NEXT(c, c_links.tqe); - } - TAILQ_REMOVE(&cc->cc_callwheel[c->c_time & callwheelmask], c, - c_links.tqe); - CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p", c, c->c_func, c->c_arg); + TAILQ_REMOVE(&cc->cc_callwheel[c->c_time & callwheelmask], c, + c_links.tqe); + callout_cc_del(c, cc); - if (c->c_flags & CALLOUT_LOCAL_ALLOC) { - c->c_func = NULL; - SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle); - } CC_UNLOCK(cc); return (1); } diff --git a/freebsd/sys/kern/subr_bus.c b/freebsd/sys/kern/subr_bus.c index 51717b49..78a803e0 100644 --- a/freebsd/sys/kern/subr_bus.c +++ b/freebsd/sys/kern/subr_bus.c @@ -430,8 +430,8 @@ static void devinit(void) { #ifndef __rtems__ - devctl_dev = make_dev(&dev_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600, - "devctl"); + devctl_dev = make_dev_credf(MAKEDEV_ETERNAL, &dev_cdevsw, 0, NULL, + UID_ROOT, GID_WHEEL, 0600, "devctl"); mtx_init(&devsoftc.mtx, "dev mtx", "devd", MTX_DEF); cv_init(&devsoftc.cv, "dev cv"); TAILQ_INIT(&devsoftc.devq); @@ -1058,10 +1058,12 @@ devclass_find(const char *classname) * is called by devclass_add_driver to accomplish the recursive * notification of all the children classes of dc, as well as dc. * Each layer will have BUS_DRIVER_ADDED() called for all instances of - * the devclass. We do a full search here of the devclass list at - * each iteration level to save storing children-lists in the devclass - * structure. If we ever move beyond a few dozen devices doing this, - * we may need to reevaluate... + * the devclass. + * + * We do a full search here of the devclass list at each iteration + * level to save storing children-lists in the devclass structure. If + * we ever move beyond a few dozen devices doing this, we may need to + * reevaluate... * * @param dc the devclass to edit * @param driver the driver that was just added @@ -1156,6 +1158,77 @@ devclass_add_driver(devclass_t dc, driver_t *driver, int pass, devclass_t *dcp) } /** + * @brief Register that a device driver has been deleted from a devclass + * + * Register that a device driver has been removed from a devclass. + * This is called by devclass_delete_driver to accomplish the + * recursive notification of all the children classes of busclass, as + * well as busclass. Each layer will attempt to detach the driver + * from any devices that are children of the bus's devclass. The function + * will return an error if a device fails to detach. + * + * We do a full search here of the devclass list at each iteration + * level to save storing children-lists in the devclass structure. If + * we ever move beyond a few dozen devices doing this, we may need to + * reevaluate... + * + * @param busclass the devclass of the parent bus + * @param dc the devclass of the driver being deleted + * @param driver the driver being deleted + */ +static int +devclass_driver_deleted(devclass_t busclass, devclass_t dc, driver_t *driver) +{ + devclass_t parent; + device_t dev; + int error, i; + + /* + * Disassociate from any devices. We iterate through all the + * devices in the devclass of the driver and detach any which are + * using the driver and which have a parent in the devclass which + * we are deleting from. + * + * Note that since a driver can be in multiple devclasses, we + * should not detach devices which are not children of devices in + * the affected devclass. + */ + for (i = 0; i < dc->maxunit; i++) { + if (dc->devices[i]) { + dev = dc->devices[i]; + if (dev->driver == driver && dev->parent && + dev->parent->devclass == busclass) { + if ((error = device_detach(dev)) != 0) + return (error); + BUS_PROBE_NOMATCH(dev->parent, dev); + devnomatch(dev); + dev->flags |= DF_DONENOMATCH; + } + } + } + + /* + * Walk through the children classes. Since we only keep a + * single parent pointer around, we walk the entire list of + * devclasses looking for children. We set the + * DC_HAS_CHILDREN flag when a child devclass is created on + * the parent, so we only walk the list for those devclasses + * that have children. + */ + if (!(busclass->flags & DC_HAS_CHILDREN)) + return (0); + parent = busclass; + TAILQ_FOREACH(busclass, &devclasses, link) { + if (busclass->parent == parent) { + error = devclass_driver_deleted(busclass, dc, driver); + if (error) + return (error); + } + } + return (0); +} + +/** * @brief Delete a device driver from a device class * * Delete a device driver from a devclass. This is normally called @@ -1174,8 +1247,6 @@ devclass_delete_driver(devclass_t busclass, driver_t *driver) { devclass_t dc = devclass_find(driver->name); driverlink_t dl; - device_t dev; - int i; int error; PDEBUG(("%s from devclass %s", driver->name, DEVCLANAME(busclass))); @@ -1197,27 +1268,9 @@ devclass_delete_driver(devclass_t busclass, driver_t *driver) return (ENOENT); } - /* - * Disassociate from any devices. We iterate through all the - * devices in the devclass of the driver and detach any which are - * using the driver and which have a parent in the devclass which - * we are deleting from. - * - * Note that since a driver can be in multiple devclasses, we - * should not detach devices which are not children of devices in - * the affected devclass. - */ - for (i = 0; i < dc->maxunit; i++) { - if (dc->devices[i]) { - dev = dc->devices[i]; - if (dev->driver == driver && dev->parent && - dev->parent->devclass == busclass) { - if ((error = device_detach(dev)) != 0) - return (error); - device_set_driver(dev, NULL); - } - } - } + error = devclass_driver_deleted(busclass, dc, driver); + if (error != 0) + return (error); TAILQ_REMOVE(&busclass->drivers, dl, link); free(dl, M_BUS); @@ -1889,7 +1942,7 @@ device_delete_child(device_t dev, device_t child) PDEBUG(("%s from %s", DEVICENAME(child), DEVICENAME(dev))); /* remove children first */ - while ( (grandchild = TAILQ_FIRST(&child->children)) ) { + while ((grandchild = TAILQ_FIRST(&child->children)) != NULL) { error = device_delete_child(child, grandchild); if (error) return (error); @@ -1908,6 +1961,39 @@ device_delete_child(device_t dev, device_t child) } /** + * @brief Delete all children devices of the given device, if any. + * + * This function deletes all children devices of the given device, if + * any, using the device_delete_child() function for each device it + * finds. If a child device cannot be deleted, this function will + * return an error code. + * + * @param dev the parent device + * + * @retval 0 success + * @retval non-zero a device would not detach + */ +int +device_delete_children(device_t dev) +{ + device_t child; + int error; + + PDEBUG(("Deleting all children of %s", DEVICENAME(dev))); + + error = 0; + + while ((child = TAILQ_FIRST(&dev->children)) != NULL) { + error = device_delete_child(dev, child); + if (error) { + PDEBUG(("Failed deleting %s", DEVICENAME(child))); + break; + } + } + return (error); +} + +/** * @brief Find a device given a unit number * * This is similar to devclass_get_devices() but only searches for @@ -2001,19 +2087,23 @@ device_probe_child(device_t dev, device_t child) for (dl = first_matching_driver(dc, child); dl; dl = next_matching_driver(dc, child, dl)) { - /* If this driver's pass is too high, then ignore it. */ if (dl->pass > bus_current_pass) continue; PDEBUG(("Trying %s", DRIVERNAME(dl->driver))); - device_set_driver(child, dl->driver); + result = device_set_driver(child, dl->driver); + if (result == ENOMEM) + return (result); + else if (result != 0) + continue; if (!hasclass) { - if (device_set_devclass(child, dl->driver->name)) { - printf("driver bug: Unable to set devclass (devname: %s)\n", - (child ? device_get_name(child) : - "no device")); - device_set_driver(child, NULL); + if (device_set_devclass(child, + dl->driver->name) != 0) { + printf("driver bug: Unable to set " + "devclass (devname: %s)\n", + device_get_name(child)); + (void)device_set_driver(child, NULL); continue; } } @@ -2029,7 +2119,7 @@ device_probe_child(device_t dev, device_t child) /* Reset flags and devclass before the next probe. */ child->devflags = 0; if (!hasclass) - device_set_devclass(child, NULL); + (void)device_set_devclass(child, NULL); /* * If the driver returns SUCCESS, there can be @@ -2046,7 +2136,7 @@ device_probe_child(device_t dev, device_t child) * certainly doesn't match. */ if (result > 0) { - device_set_driver(child, NULL); + (void)device_set_driver(child, NULL); continue; } @@ -2083,7 +2173,7 @@ device_probe_child(device_t dev, device_t child) /* XXX What happens if we rebid and got no best? */ if (best) { /* - * If this device was atached, and we were asked to + * If this device was attached, and we were asked to * rescan, and it is a different driver, then we have * to detach the old driver and reattach this new one. * Note, we don't have to check for DF_REBID here @@ -2109,7 +2199,9 @@ device_probe_child(device_t dev, device_t child) if (result != 0) return (result); } - device_set_driver(child, best->driver); + result = device_set_driver(child, best->driver); + if (result != 0) + return (result); #ifndef __rtems__ resource_int_value(best->driver->name, child->unit, "flags", &child->devflags); @@ -2171,6 +2263,11 @@ device_get_children(device_t dev, device_t **devlistp, int *devcountp) TAILQ_FOREACH(child, &dev->children, link) { count++; } + if (count == 0) { + *devlistp = NULL; + *devcountp = 0; + return (0); + } #ifdef __rtems__ /* malloc(0) may return NULL */ @@ -2471,12 +2568,13 @@ device_disable(device_t dev) void device_busy(device_t dev) { - if (dev->state < DS_ATTACHED) + if (dev->state < DS_ATTACHING) panic("device_busy: called for unattached device"); if (dev->busy == 0 && dev->parent) device_busy(dev->parent); dev->busy++; - dev->state = DS_BUSY; + if (dev->state == DS_ATTACHED) + dev->state = DS_BUSY; } /** @@ -2485,14 +2583,16 @@ device_busy(device_t dev) void device_unbusy(device_t dev) { - if (dev->state != DS_BUSY) + if (dev->busy != 0 && dev->state != DS_BUSY && + dev->state != DS_ATTACHING) panic("device_unbusy: called for non-busy device %s", device_get_nameunit(dev)); dev->busy--; if (dev->busy == 0) { if (dev->parent) device_unbusy(dev->parent); - dev->state = DS_ATTACHED; + if (dev->state == DS_BUSY) + dev->state = DS_ATTACHED; } } @@ -2602,6 +2702,7 @@ device_set_driver(device_t dev, driver_t *driver) free(dev->softc, M_BUS_SC); dev->softc = NULL; } + device_set_desc(dev, NULL); kobj_delete((kobj_t) dev, NULL); dev->driver = driver; if (driver) { @@ -2724,22 +2825,36 @@ device_attach(device_t dev) { int error; +#ifndef __rtems__ + if (resource_disabled(dev->driver->name, dev->unit)) { + device_disable(dev); + if (bootverbose) + device_printf(dev, "disabled via hints entry\n"); + return (ENXIO); + } +#endif /* __rtems__ */ + device_sysctl_init(dev); if (!device_is_quiet(dev)) device_print_child(dev->parent, dev); + dev->state = DS_ATTACHING; if ((error = DEVICE_ATTACH(dev)) != 0) { printf("device_attach: %s%d attach returned %d\n", dev->driver->name, dev->unit, error); - /* Unset the class; set in device_probe_child */ - if (dev->devclass == NULL) - device_set_devclass(dev, NULL); - device_set_driver(dev, NULL); + if (!(dev->flags & DF_FIXEDCLASS)) + devclass_delete_device(dev->devclass, dev); + (void)device_set_driver(dev, NULL); device_sysctl_fini(dev); + KASSERT(dev->busy == 0, ("attach failed but busy")); dev->state = DS_NOTPRESENT; return (error); } device_sysctl_update(dev); - dev->state = DS_ATTACHED; + if (dev->busy) + dev->state = DS_BUSY; + else + dev->state = DS_ATTACHED; + dev->flags &= ~DF_DONENOMATCH; devadded(dev); return (0); } @@ -2785,8 +2900,7 @@ device_detach(device_t dev) devclass_delete_device(dev->devclass, dev); dev->state = DS_NOTPRESENT; - device_set_driver(dev, NULL); - device_set_desc(dev, NULL); + (void)device_set_driver(dev, NULL); device_sysctl_fini(dev); return (0); @@ -3517,6 +3631,23 @@ bus_generic_teardown_intr(device_t dev, device_t child, struct resource *irq, } /** + * @brief Helper function for implementing BUS_ADJUST_RESOURCE(). + * + * This simple implementation of BUS_ADJUST_RESOURCE() simply calls the + * BUS_ADJUST_RESOURCE() method of the parent of @p dev. + */ +int +bus_generic_adjust_resource(device_t dev, device_t child, int type, + struct resource *r, u_long start, u_long end) +{ + /* Propagate up the bus hierarchy until someone handles it. */ + if (dev->parent) + return (BUS_ADJUST_RESOURCE(dev->parent, child, type, r, start, + end)); + return (EINVAL); +} + +/** * @brief Helper function for implementing BUS_ALLOC_RESOURCE(). * * This simple implementation of BUS_ALLOC_RESOURCE() simply calls the @@ -3839,6 +3970,21 @@ bus_alloc_resource(device_t dev, int type, int *rid, u_long start, u_long end, } /** + * @brief Wrapper function for BUS_ADJUST_RESOURCE(). + * + * This function simply calls the BUS_ADJUST_RESOURCE() method of the + * parent of @p dev. + */ +int +bus_adjust_resource(device_t dev, int type, struct resource *r, u_long start, + u_long end) +{ + if (dev->parent == NULL) + return (EINVAL); + return (BUS_ADJUST_RESOURCE(dev->parent, dev, type, r, start, end)); +} + +/** * @brief Wrapper function for BUS_ACTIVATE_RESOURCE(). * * This function simply calls the BUS_ACTIVATE_RESOURCE() method of the @@ -4417,7 +4563,6 @@ print_driver(driver_t *driver, int indent) print_driver_short(driver, indent); } - static void print_driver_list(driver_list_t drivers, int indent) { diff --git a/freebsd/sys/kern/subr_kobj.c b/freebsd/sys/kern/subr_kobj.c index e12a1f36..8d182173 100644 --- a/freebsd/sys/kern/subr_kobj.c +++ b/freebsd/sys/kern/subr_kobj.c @@ -62,18 +62,9 @@ static struct mtx kobj_mtx; static int kobj_mutex_inited; static int kobj_next_id = 1; -/* - * In the event that kobj_mtx has not been initialized yet, - * we will ignore it, and run without locks in order to support - * use of KOBJ before mutexes are available. This early in the boot - * process, everything is single threaded and so races should not - * happen. This is used to provide the PMAP layer on PowerPC, as well - * as board support. - */ - -#define KOBJ_LOCK() if (kobj_mutex_inited) mtx_lock(&kobj_mtx); -#define KOBJ_UNLOCK() if (kobj_mutex_inited) mtx_unlock(&kobj_mtx); -#define KOBJ_ASSERT(what) if (kobj_mutex_inited) mtx_assert(&kobj_mtx,what); +#define KOBJ_LOCK() mtx_lock(&kobj_mtx) +#define KOBJ_UNLOCK() mtx_unlock(&kobj_mtx) +#define KOBJ_ASSERT(what) mtx_assert(&kobj_mtx, what); SYSCTL_UINT(_kern, OID_AUTO, kobj_methodcount, CTLFLAG_RD, &kobj_next_id, 0, ""); @@ -106,28 +97,11 @@ kobj_error_method(void) } static void -kobj_register_method(struct kobjop_desc *desc) -{ - KOBJ_ASSERT(MA_OWNED); - - if (desc->id == 0) { - desc->id = kobj_next_id++; - } -} - -static void -kobj_unregister_method(struct kobjop_desc *desc) -{ -} - -static void kobj_class_compile_common(kobj_class_t cls, kobj_ops_t ops) { kobj_method_t *m; int i; - KOBJ_ASSERT(MA_OWNED); - /* * Don't do anything if we are already compiled. */ @@ -137,8 +111,10 @@ kobj_class_compile_common(kobj_class_t cls, kobj_ops_t ops) /* * First register any methods which need it. */ - for (i = 0, m = cls->methods; m->desc; i++, m++) - kobj_register_method(m->desc); + for (i = 0, m = cls->methods; m->desc; i++, m++) { + if (m->desc->id == 0) + m->desc->id = kobj_next_id++; + } /* * Then initialise the ops table. @@ -161,7 +137,7 @@ kobj_class_compile(kobj_class_t cls) */ ops = malloc(sizeof(struct kobj_ops), M_KOBJ, M_NOWAIT); if (!ops) - panic("kobj_compile_methods: out of memory"); + panic("%s: out of memory", __func__); KOBJ_LOCK(); @@ -184,17 +160,14 @@ void kobj_class_compile_static(kobj_class_t cls, kobj_ops_t ops) { - KOBJ_ASSERT(MA_NOTOWNED); + KASSERT(kobj_mutex_inited == 0, + ("%s: only supported during early cycles", __func__)); /* * Increment refs to make sure that the ops table is not freed. */ - KOBJ_LOCK(); - cls->refs++; kobj_class_compile_common(cls, ops); - - KOBJ_UNLOCK(); } static kobj_method_t* @@ -261,8 +234,6 @@ kobj_lookup_method(kobj_class_t cls, void kobj_class_free(kobj_class_t cls) { - int i; - kobj_method_t *m; void* ops = NULL; KOBJ_ASSERT(MA_NOTOWNED); @@ -274,10 +245,9 @@ kobj_class_free(kobj_class_t cls) */ if (cls->refs == 0) { /* - * Unregister any methods which are no longer used. + * For now we don't do anything to unregister any methods + * which are no longer used. */ - for (i = 0, m = cls->methods; m->desc; i++, m++) - kobj_unregister_method(m->desc); /* * Free memory and clean up. @@ -310,6 +280,14 @@ kobj_create(kobj_class_t cls, return obj; } +static void +kobj_init_common(kobj_t obj, kobj_class_t cls) +{ + + obj->ops = cls->ops; + cls->refs++; +} + void kobj_init(kobj_t obj, kobj_class_t cls) { @@ -331,13 +309,22 @@ kobj_init(kobj_t obj, kobj_class_t cls) goto retry; } - obj->ops = cls->ops; - cls->refs++; + kobj_init_common(obj, cls); KOBJ_UNLOCK(); } void +kobj_init_static(kobj_t obj, kobj_class_t cls) +{ + + KASSERT(kobj_mutex_inited == 0, + ("%s: only supported during early cycles", __func__)); + + kobj_init_common(obj, cls); +} + +void kobj_delete(kobj_t obj, struct malloc_type *mtype) { kobj_class_t cls = obj->ops->cls; diff --git a/freebsd/sys/kern/subr_rman.c b/freebsd/sys/kern/subr_rman.c index c3c87e44..a1982daf 100644 --- a/freebsd/sys/kern/subr_rman.c +++ b/freebsd/sys/kern/subr_rman.c @@ -140,6 +140,8 @@ rman_init(struct rman *rm) mtx_init(&rman_mtx, "rman head", NULL, MTX_DEF); } + if (rm->rm_start == 0 && rm->rm_end == 0) + rm->rm_end = ~0ul; if (rm->rm_type == RMAN_UNINIT) panic("rman_init"); if (rm->rm_type == RMAN_GAUGE) @@ -164,6 +166,8 @@ rman_manage_region(struct rman *rm, u_long start, u_long end) DPRINTF(("rman_manage_region: <%s> request: start %#lx, end %#lx\n", rm->rm_descr, start, end)); + if (start < rm->rm_start || end > rm->rm_end) + return EINVAL; r = int_alloc_resource(M_NOWAIT); if (r == NULL) return ENOMEM; @@ -270,6 +274,164 @@ rman_fini(struct rman *rm) return 0; } +int +rman_first_free_region(struct rman *rm, u_long *start, u_long *end) +{ + struct resource_i *r; + + mtx_lock(rm->rm_mtx); + TAILQ_FOREACH(r, &rm->rm_list, r_link) { + if (!(r->r_flags & RF_ALLOCATED)) { + *start = r->r_start; + *end = r->r_end; + mtx_unlock(rm->rm_mtx); + return (0); + } + } + mtx_unlock(rm->rm_mtx); + return (ENOENT); +} + +int +rman_last_free_region(struct rman *rm, u_long *start, u_long *end) +{ + struct resource_i *r; + + mtx_lock(rm->rm_mtx); + TAILQ_FOREACH_REVERSE(r, &rm->rm_list, resource_head, r_link) { + if (!(r->r_flags & RF_ALLOCATED)) { + *start = r->r_start; + *end = r->r_end; + mtx_unlock(rm->rm_mtx); + return (0); + } + } + mtx_unlock(rm->rm_mtx); + return (ENOENT); +} + +/* Shrink or extend one or both ends of an allocated resource. */ +int +rman_adjust_resource(struct resource *rr, u_long start, u_long end) +{ + struct resource_i *r, *s, *t, *new; + struct rman *rm; + + /* Not supported for shared resources. */ + r = rr->__r_i; + if (r->r_flags & (RF_TIMESHARE | RF_SHAREABLE)) + return (EINVAL); + + /* + * This does not support wholesale moving of a resource. At + * least part of the desired new range must overlap with the + * existing resource. + */ + if (end < r->r_start || r->r_end < start) + return (EINVAL); + + /* + * Find the two resource regions immediately adjacent to the + * allocated resource. + */ + rm = r->r_rm; + mtx_lock(rm->rm_mtx); +#ifdef INVARIANTS + TAILQ_FOREACH(s, &rm->rm_list, r_link) { + if (s == r) + break; + } + if (s == NULL) + panic("resource not in list"); +#endif + s = TAILQ_PREV(r, resource_head, r_link); + t = TAILQ_NEXT(r, r_link); + KASSERT(s == NULL || s->r_end + 1 == r->r_start, + ("prev resource mismatch")); + KASSERT(t == NULL || r->r_end + 1 == t->r_start, + ("next resource mismatch")); + + /* + * See if the changes are permitted. Shrinking is always allowed, + * but growing requires sufficient room in the adjacent region. + */ + if (start < r->r_start && (s == NULL || (s->r_flags & RF_ALLOCATED) || + s->r_start > start)) { + mtx_unlock(rm->rm_mtx); + return (EBUSY); + } + if (end > r->r_end && (t == NULL || (t->r_flags & RF_ALLOCATED) || + t->r_end < end)) { + mtx_unlock(rm->rm_mtx); + return (EBUSY); + } + + /* + * While holding the lock, grow either end of the resource as + * needed and shrink either end if the shrinking does not require + * allocating a new resource. We can safely drop the lock and then + * insert a new range to handle the shrinking case afterwards. + */ + if (start < r->r_start || + (start > r->r_start && s != NULL && !(s->r_flags & RF_ALLOCATED))) { + KASSERT(s->r_flags == 0, ("prev is busy")); + r->r_start = start; + if (s->r_start == start) { + TAILQ_REMOVE(&rm->rm_list, s, r_link); + free(s, M_RMAN); + } else + s->r_end = start - 1; + } + if (end > r->r_end || + (end < r->r_end && t != NULL && !(t->r_flags & RF_ALLOCATED))) { + KASSERT(t->r_flags == 0, ("next is busy")); + r->r_end = end; + if (t->r_end == end) { + TAILQ_REMOVE(&rm->rm_list, t, r_link); + free(t, M_RMAN); + } else + t->r_start = end + 1; + } + mtx_unlock(rm->rm_mtx); + + /* + * Handle the shrinking cases that require allocating a new + * resource to hold the newly-free region. We have to recheck + * if we still need this new region after acquiring the lock. + */ + if (start > r->r_start) { + new = int_alloc_resource(M_WAITOK); + new->r_start = r->r_start; + new->r_end = start - 1; + new->r_rm = rm; + mtx_lock(rm->rm_mtx); + r->r_start = start; + s = TAILQ_PREV(r, resource_head, r_link); + if (s != NULL && !(s->r_flags & RF_ALLOCATED)) { + s->r_end = start - 1; + free(new, M_RMAN); + } else + TAILQ_INSERT_BEFORE(r, new, r_link); + mtx_unlock(rm->rm_mtx); + } + if (end < r->r_end) { + new = int_alloc_resource(M_WAITOK); + new->r_start = end + 1; + new->r_end = r->r_end; + new->r_rm = rm; + mtx_lock(rm->rm_mtx); + r->r_end = end; + t = TAILQ_NEXT(r, r_link); + if (t != NULL && !(t->r_flags & RF_ALLOCATED)) { + t->r_start = end + 1; + free(new, M_RMAN); + } else + TAILQ_INSERT_AFTER(&rm->rm_list, r, new, r_link); + mtx_unlock(rm->rm_mtx); + } + return (0); +} + struct resource * rman_reserve_resource_bound(struct rman *rm, u_long start, u_long end, u_long count, u_long bound, u_int flags, @@ -679,6 +841,7 @@ int_rman_release_resource(struct rman *rm, struct resource_i *r) * without freeing anything. */ r->r_flags &= ~RF_ALLOCATED; + r->r_dev = NULL; return 0; } diff --git a/freebsd/sys/kern/subr_sbuf.c b/freebsd/sys/kern/subr_sbuf.c index 165d17a3..c57392d6 100644 --- a/freebsd/sys/kern/subr_sbuf.c +++ b/freebsd/sys/kern/subr_sbuf.c @@ -35,6 +35,7 @@ __FBSDID("$FreeBSD$"); #ifdef _KERNEL #include <sys/ctype.h> +#include <rtems/bsd/sys/errno.h> #include <sys/kernel.h> #include <sys/malloc.h> #include <sys/systm.h> @@ -42,6 +43,7 @@ __FBSDID("$FreeBSD$"); #include <machine/stdarg.h> #else /* _KERNEL */ #include <ctype.h> +#include <errno.h> #include <stdarg.h> #include <stdio.h> #include <stdlib.h> @@ -50,6 +52,12 @@ __FBSDID("$FreeBSD$"); #include <sys/sbuf.h> +struct sbuf_drain { + sbuf_drain_func *s_func; /* drain function */ + void *s_arg; /* user-supplied drain argument */ + int s_error; /* current error code */ +}; + #ifdef _KERNEL static MALLOC_DEFINE(M_SBUF, "sbuf", "string buffers"); #define SBMALLOC(size) malloc(size, M_SBUF, M_WAITOK) @@ -58,7 +66,6 @@ static MALLOC_DEFINE(M_SBUF, "sbuf", "string buffers"); #define KASSERT(e, m) #define SBMALLOC(size) malloc(size) #define SBFREE(buf) free(buf) -#define min(x,y) MIN(x,y) #endif /* _KERNEL */ /* @@ -118,18 +125,24 @@ _assert_sbuf_state(const char *fun, struct sbuf *s, int state) #endif /* _KERNEL && INVARIANTS */ +#ifdef CTASSERT +CTASSERT(powerof2(SBUF_MAXEXTENDSIZE)); +CTASSERT(powerof2(SBUF_MAXEXTENDINCR)); +#endif + static int sbuf_extendsize(int size) { int newsize; - newsize = SBUF_MINEXTENDSIZE; - while (newsize < size) { - if (newsize < (int)SBUF_MAXEXTENDSIZE) + if (size < (int)SBUF_MAXEXTENDSIZE) { + newsize = SBUF_MINEXTENDSIZE; + while (newsize < size) newsize *= 2; - else - newsize += SBUF_MAXEXTENDINCR; + } else { + newsize = roundup2(size, SBUF_MAXEXTENDINCR); } + KASSERT(newsize >= size, ("%s: %d < %d\n", __func__, newsize, size)); return (newsize); } @@ -186,11 +199,11 @@ sbuf_new(struct sbuf *s, char *buf, int length, int flags) s->s_flags = flags; } s->s_size = length; - if (buf) { + if (buf != NULL) { s->s_buf = buf; return (s); } - if (flags & SBUF_AUTOEXTEND) + if ((flags & SBUF_AUTOEXTEND) != 0) s->s_size = sbuf_extendsize(s->s_size); s->s_buf = SBMALLOC(s->s_size); if (s->s_buf == NULL) { @@ -243,6 +256,8 @@ sbuf_clear(struct sbuf *s) SBUF_CLEARFLAG(s, SBUF_FINISHED); SBUF_CLEARFLAG(s, SBUF_OVERFLOWED); + if (s->s_drain != NULL) + s->s_drain->s_error = 0; s->s_len = 0; } @@ -269,27 +284,125 @@ sbuf_setpos(struct sbuf *s, int pos) } /* + * Set up a drain function and argument on an sbuf to flush data to + * when the sbuf buffer overflows. + */ +void +sbuf_set_drain(struct sbuf *s, sbuf_drain_func *func, void *ctx) +{ + + assert_sbuf_state(s, 0); + assert_sbuf_integrity(s); + KASSERT((s->s_drain != NULL && func == s->s_drain->s_func) || + s->s_len == 0, + ("Cannot change drain to %p on non-empty sbuf %p", func, s)); + if (func == NULL) { + SBFREE(s->s_drain); + s->s_drain = NULL; + return; + } + if (s->s_drain == NULL) { + s->s_drain = SBMALLOC(sizeof(*s->s_drain)); + if (s->s_drain == NULL) + return; + } + s->s_drain->s_func = func; + s->s_drain->s_arg = ctx; + s->s_drain->s_error = 0; +} + +/* + * Call the drain and process the return. + */ +static int +sbuf_drain(struct sbuf *s) +{ + int len; + + KASSERT(s->s_len > 0, ("Shouldn't drain empty sbuf %p", s)); + len = s->s_drain->s_func(s->s_drain->s_arg, s->s_buf, s->s_len); + if (len < 0) { + s->s_drain->s_error = -len; + SBUF_SETFLAG(s, SBUF_OVERFLOWED); + return (s->s_drain->s_error); + } + KASSERT(len > 0 && len <= s->s_len, + ("Bad drain amount %d for sbuf %p", len, s)); + s->s_len -= len; + /* + * Fast path for the expected case where all the data was + * drained. + */ + if (s->s_len == 0) + return (0); + /* + * Move the remaining characters to the beginning of the + * string. + */ + memmove(s->s_buf, s->s_buf + len, s->s_len); + return (0); +} + +/* + * Append a byte to an sbuf. This is the core function for appending + * to an sbuf and is the main place that deals with extending the + * buffer and marking overflow. + */ +static void +sbuf_put_byte(int c, struct sbuf *s) +{ + + assert_sbuf_integrity(s); + assert_sbuf_state(s, 0); + + if (SBUF_HASOVERFLOWED(s)) + return; + if (SBUF_FREESPACE(s) <= 0) { + /* + * If there is a drain, use it, otherwise extend the + * buffer. + */ + if (s->s_drain != NULL) + (void)sbuf_drain(s); + else if (sbuf_extend(s, 1) < 0) + SBUF_SETFLAG(s, SBUF_OVERFLOWED); + if (SBUF_HASOVERFLOWED(s)) + return; + } + s->s_buf[s->s_len++] = c; +} + +/* + * Append a non-NUL character to an sbuf. This prototype signature is + * suitable for use with kvprintf(9). + */ +static void +sbuf_putc_func(int c, void *arg) +{ + + if (c != '\0') + sbuf_put_byte(c, arg); +} + +/* * Append a byte string to an sbuf. */ int sbuf_bcat(struct sbuf *s, const void *buf, size_t len) { const char *str = buf; + const char *end = str + len; assert_sbuf_integrity(s); assert_sbuf_state(s, 0); if (SBUF_HASOVERFLOWED(s)) return (-1); - for (; len; len--) { - if (!SBUF_HASROOM(s) && sbuf_extend(s, len) < 0) - break; - s->s_buf[s->s_len++] = *str++; - } - if (len) { - SBUF_SETFLAG(s, SBUF_OVERFLOWED); - return (-1); - } + for (; str < end; str++) { + sbuf_put_byte(*str, s); + if (SBUF_HASOVERFLOWED(s)) + return (-1); + } return (0); } @@ -303,6 +416,8 @@ sbuf_bcopyin(struct sbuf *s, const void *uaddr, size_t len) assert_sbuf_integrity(s); assert_sbuf_state(s, 0); + KASSERT(s->s_drain == NULL, + ("Nonsensical copyin to sbuf %p with a drain", s)); if (SBUF_HASOVERFLOWED(s)) return (-1); @@ -310,7 +425,8 @@ sbuf_bcopyin(struct sbuf *s, const void *uaddr, size_t len) return (0); if (len > SBUF_FREESPACE(s)) { sbuf_extend(s, len - SBUF_FREESPACE(s)); - len = min(len, SBUF_FREESPACE(s)); + if (SBUF_FREESPACE(s) < len) + len = SBUF_FREESPACE(s); } if (copyin(uaddr, s->s_buf + s->s_len, len) != 0) return (-1); @@ -347,14 +463,10 @@ sbuf_cat(struct sbuf *s, const char *str) if (SBUF_HASOVERFLOWED(s)) return (-1); - while (*str) { - if (!SBUF_HASROOM(s) && sbuf_extend(s, strlen(str)) < 0) - break; - s->s_buf[s->s_len++] = *str++; - } - if (*str) { - SBUF_SETFLAG(s, SBUF_OVERFLOWED); - return (-1); + while (*str != '\0') { + sbuf_put_byte(*str++, s); + if (SBUF_HASOVERFLOWED(s)) + return (-1); } return (0); } @@ -370,6 +482,8 @@ sbuf_copyin(struct sbuf *s, const void *uaddr, size_t len) assert_sbuf_integrity(s); assert_sbuf_state(s, 0); + KASSERT(s->s_drain == NULL, + ("Nonsensical copyin to sbuf %p with a drain", s)); if (SBUF_HASOVERFLOWED(s)) return (-1); @@ -378,7 +492,8 @@ sbuf_copyin(struct sbuf *s, const void *uaddr, size_t len) len = SBUF_FREESPACE(s); /* XXX return 0? */ if (len > SBUF_FREESPACE(s)) { sbuf_extend(s, len); - len = min(len, SBUF_FREESPACE(s)); + if (SBUF_FREESPACE(s) < len) + len = SBUF_FREESPACE(s); } switch (copyinstr(uaddr, s->s_buf + s->s_len, len + 1, &done)) { case ENAMETOOLONG: @@ -412,11 +527,28 @@ sbuf_cpy(struct sbuf *s, const char *str) /* * Format the given argument list and append the resulting string to an sbuf. */ +#if defined(_KERNEL) && !defined(__rtems__) +int +sbuf_vprintf(struct sbuf *s, const char *fmt, va_list ap) +{ + + assert_sbuf_integrity(s); + assert_sbuf_state(s, 0); + + KASSERT(fmt != NULL, + ("%s called with a NULL format string", __func__)); + + (void)kvprintf(fmt, sbuf_putc_func, s, 10, ap); + if (SBUF_HASOVERFLOWED(s)) + return (-1); + return (0); +} +#else /* !_KERNEL */ int sbuf_vprintf(struct sbuf *s, const char *fmt, va_list ap) { va_list ap_copy; - int len; + int error, len; assert_sbuf_integrity(s); assert_sbuf_state(s, 0); @@ -427,13 +559,32 @@ sbuf_vprintf(struct sbuf *s, const char *fmt, va_list ap) if (SBUF_HASOVERFLOWED(s)) return (-1); + /* + * For the moment, there is no way to get vsnprintf(3) to hand + * back a character at a time, to push everything into + * sbuf_putc_func() as was done for the kernel. + * + * In userspace, while drains are useful, there's generally + * not a problem attempting to malloc(3) on out of space. So + * expand a userland sbuf if there is not enough room for the + * data produced by sbuf_[v]printf(3). + */ + + error = 0; do { va_copy(ap_copy, ap); len = vsnprintf(&s->s_buf[s->s_len], SBUF_FREESPACE(s) + 1, fmt, ap_copy); va_end(ap_copy); - } while (len > SBUF_FREESPACE(s) && - sbuf_extend(s, len - SBUF_FREESPACE(s)) == 0); + + if (SBUF_FREESPACE(s) >= len) + break; + /* Cannot print with the current available space. */ + if (s->s_drain != NULL && s->s_len > 0) + error = sbuf_drain(s); + else + error = sbuf_extend(s, len - SBUF_FREESPACE(s)); + } while (error == 0); /* * s->s_len is the length of the string, without the terminating nul. @@ -442,9 +593,11 @@ sbuf_vprintf(struct sbuf *s, const char *fmt, va_list ap) * terminating nul. * * vsnprintf() returns the amount that would have been copied, - * given sufficient space, hence the min() calculation below. + * given sufficient space, so don't over-increment s_len. */ - s->s_len += min(len, SBUF_FREESPACE(s)); + if (SBUF_FREESPACE(s) < len) + len = SBUF_FREESPACE(s); + s->s_len += len; if (!SBUF_HASROOM(s) && !SBUF_CANEXTEND(s)) SBUF_SETFLAG(s, SBUF_OVERFLOWED); @@ -455,6 +608,7 @@ sbuf_vprintf(struct sbuf *s, const char *fmt, va_list ap) return (-1); return (0); } +#endif /* _KERNEL */ /* * Format the given arguments and append the resulting string to an sbuf. @@ -478,17 +632,9 @@ int sbuf_putc(struct sbuf *s, int c) { - assert_sbuf_integrity(s); - assert_sbuf_state(s, 0); - + sbuf_putc_func(c, s); if (SBUF_HASOVERFLOWED(s)) return (-1); - if (!SBUF_HASROOM(s) && sbuf_extend(s, 1) < 0) { - SBUF_SETFLAG(s, SBUF_OVERFLOWED); - return (-1); - } - if (c != '\0') - s->s_buf[s->s_len++] = c; return (0); } @@ -501,11 +647,13 @@ sbuf_trim(struct sbuf *s) assert_sbuf_integrity(s); assert_sbuf_state(s, 0); + KASSERT(s->s_drain == NULL, + ("%s makes no sense on sbuf %p with drain", __func__, s)); if (SBUF_HASOVERFLOWED(s)) return (-1); - while (s->s_len && isspace(s->s_buf[s->s_len-1])) + while (s->s_len > 0 && isspace(s->s_buf[s->s_len-1])) --s->s_len; return (0); @@ -524,16 +672,32 @@ sbuf_overflowed(struct sbuf *s) /* * Finish off an sbuf. */ -void +int sbuf_finish(struct sbuf *s) { + int error = 0; assert_sbuf_integrity(s); assert_sbuf_state(s, 0); + if (s->s_drain != NULL) { + error = s->s_drain->s_error; + while (s->s_len > 0 && error == 0) + error = sbuf_drain(s); + } else if (SBUF_HASOVERFLOWED(s)) + error = ENOMEM; s->s_buf[s->s_len] = '\0'; SBUF_CLEARFLAG(s, SBUF_OVERFLOWED); SBUF_SETFLAG(s, SBUF_FINISHED); +#ifdef _KERNEL + return (error); +#else + /*XXX*/if (error) { + errno = error; + return (-1); + } else + return (0); +#endif } /* @@ -545,6 +709,8 @@ sbuf_data(struct sbuf *s) assert_sbuf_integrity(s); assert_sbuf_state(s, SBUF_FINISHED); + KASSERT(s->s_drain == NULL, + ("%s makes no sense on sbuf %p with drain", __func__, s)); return (s->s_buf); } @@ -558,6 +724,8 @@ sbuf_len(struct sbuf *s) assert_sbuf_integrity(s); /* don't care if it's finished or not */ + KASSERT(s->s_drain == NULL, + ("%s makes no sense on sbuf %p with drain", __func__, s)); if (SBUF_HASOVERFLOWED(s)) return (-1); @@ -577,6 +745,8 @@ sbuf_delete(struct sbuf *s) if (SBUF_ISDYNAMIC(s)) SBFREE(s->s_buf); + if (s->s_drain != NULL) + SBFREE(s->s_drain); isdyn = SBUF_ISDYNSTRUCT(s); bzero(s, sizeof(*s)); if (isdyn) diff --git a/freebsd/sys/kern/subr_taskqueue.c b/freebsd/sys/kern/subr_taskqueue.c index bbfe69e4..d2072082 100644 --- a/freebsd/sys/kern/subr_taskqueue.c +++ b/freebsd/sys/kern/subr_taskqueue.c @@ -35,6 +35,7 @@ __FBSDID("$FreeBSD$"); #include <sys/interrupt.h> #include <sys/kernel.h> #include <sys/kthread.h> +#include <sys/limits.h> #include <rtems/bsd/sys/lock.h> #include <sys/malloc.h> #include <sys/mutex.h> @@ -57,7 +58,6 @@ struct taskqueue_busy { struct taskqueue { STAILQ_HEAD(, task) tq_queue; - const char *tq_name; taskqueue_enqueue_fn tq_enqueue; void *tq_context; TAILQ_HEAD(, taskqueue_busy) tq_active; @@ -110,7 +110,7 @@ TQ_SLEEP(struct taskqueue *tq, void *p, struct mtx *m, int pri, const char *wm, } static struct taskqueue * -_taskqueue_create(const char *name, int mflags, +_taskqueue_create(const char *name __unused, int mflags, taskqueue_enqueue_fn enqueue, void *context, int mtxflags, const char *mtxname) { @@ -122,7 +122,6 @@ _taskqueue_create(const char *name, int mflags, STAILQ_INIT(&queue->tq_queue); TAILQ_INIT(&queue->tq_active); - queue->tq_name = name; queue->tq_enqueue = enqueue; queue->tq_context = context; #ifndef __rtems__ @@ -186,7 +185,8 @@ taskqueue_enqueue(struct taskqueue *queue, struct task *task) * Count multiple enqueues. */ if (task->ta_pending) { - task->ta_pending++; + if (task->ta_pending < USHRT_MAX) + task->ta_pending++; TQ_UNLOCK(queue); return 0; } @@ -485,7 +485,7 @@ taskqueue_fast_run(void *dummy) } TASKQUEUE_FAST_DEFINE(fast, taskqueue_fast_enqueue, NULL, - swi_add(NULL, "Fast task queue", taskqueue_fast_run, NULL, + swi_add(NULL, "fast taskq", taskqueue_fast_run, NULL, SWI_TQ_FAST, INTR_MPSAFE, &taskqueue_fast_ih)); int diff --git a/freebsd/sys/kern/subr_unit.c b/freebsd/sys/kern/subr_unit.c index 1719ede8..386a470b 100644 --- a/freebsd/sys/kern/subr_unit.c +++ b/freebsd/sys/kern/subr_unit.c @@ -43,7 +43,7 @@ * * If a mutex is not provided when the unit number space is created, a * default global mutex is used. The advantage to passing a mutex in, is - * that the the alloc_unrl() function can be called with the mutex already + * that the alloc_unrl() function can be called with the mutex already * held (it will not be released by alloc_unrl()). * * The allocation function alloc_unr{l}() never sleeps (but it may block on @@ -54,7 +54,7 @@ * * A userland test program is included. * - * Memory usage is a very complex function of the the exact allocation + * Memory usage is a very complex function of the exact allocation * pattern, but always very compact: * * For the very typical case where a single unbroken run of unit * numbers are allocated 44 bytes are used on i386. @@ -65,7 +65,7 @@ * in the usermode test program included, the worst case usage * was 798 bytes on i386 for 5000 allocated and 5000 free units. * * The worst case is where every other unit number is allocated and - * the the rest are free. In that case 44 + N/4 bytes are used where + * the rest are free. In that case 44 + N/4 bytes are used where * N is the number of the highest unit allocated. */ @@ -630,6 +630,132 @@ alloc_unr(struct unrhdr *uh) return (i); } +static int +alloc_unr_specificl(struct unrhdr *uh, u_int item, void **p1, void **p2) +{ + struct unr *up, *upn; + struct unrb *ub; + u_int i, last, tl; + + mtx_assert(uh->mtx, MA_OWNED); + + if (item < uh->low + uh->first || item > uh->high) + return (-1); + + up = TAILQ_FIRST(&uh->head); + /* Ideal split. */ + if (up == NULL && item - uh->low == uh->first) { + uh->first++; + uh->last--; + uh->busy++; + check_unrhdr(uh, __LINE__); + return (item); + } + + i = item - uh->low - uh->first; + + if (up == NULL) { + up = new_unr(uh, p1, p2); + up->ptr = NULL; + up->len = i; + TAILQ_INSERT_TAIL(&uh->head, up, list); + up = new_unr(uh, p1, p2); + up->ptr = uh; + up->len = 1; + TAILQ_INSERT_TAIL(&uh->head, up, list); + uh->last = uh->high - uh->low - i; + uh->busy++; + check_unrhdr(uh, __LINE__); + return (item); + } else { + /* Find the item which contains the unit we want to allocate. */ + TAILQ_FOREACH(up, &uh->head, list) { + if (up->len > i) + break; + i -= up->len; + } + } + + if (up == NULL) { + if (i > 0) { + up = new_unr(uh, p1, p2); + up->ptr = NULL; + up->len = i; + TAILQ_INSERT_TAIL(&uh->head, up, list); + } + up = new_unr(uh, p1, p2); + up->ptr = uh; + up->len = 1; + TAILQ_INSERT_TAIL(&uh->head, up, list); + goto done; + } + + if (is_bitmap(uh, up)) { + ub = up->ptr; + if (bit_test(ub->map, i) == 0) { + bit_set(ub->map, i); + ub->busy++; + goto done; + } else + return (-1); + } else if (up->ptr == uh) + return (-1); + + KASSERT(up->ptr == NULL, + ("alloc_unr_specificl: up->ptr != NULL (up=%p)", up)); + + /* Split off the tail end, if any. */ + tl = up->len - (1 + i); + if (tl > 0) { + upn = new_unr(uh, p1, p2); + upn->ptr = NULL; + upn->len = tl; + TAILQ_INSERT_AFTER(&uh->head, up, upn, list); + } + + /* Split off head end, if any */ + if (i > 0) { + upn = new_unr(uh, p1, p2); + upn->len = i; + upn->ptr = NULL; + TAILQ_INSERT_BEFORE(up, upn, list); + } + up->len = 1; + up->ptr = uh; + +done: + last = uh->high - uh->low - (item - uh->low); + if (uh->last > last) + uh->last = last; + uh->busy++; + collapse_unr(uh, up); + check_unrhdr(uh, __LINE__); + return (item); +} + +int +alloc_unr_specific(struct unrhdr *uh, u_int item) +{ + void *p1, *p2; + int i; + + WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "alloc_unr_specific"); + + p1 = Malloc(sizeof(struct unr)); + p2 = Malloc(sizeof(struct unr)); + + mtx_lock(uh->mtx); + i = alloc_unr_specificl(uh, item, &p1, &p2); + mtx_unlock(uh->mtx); + + if (p1 != NULL) + Free(p1); + if (p2 != NULL) + Free(p2); + + return (i); +} + /* * Free a unr. * @@ -812,6 +938,42 @@ print_unrhdr(struct unrhdr *uh) } } +static void +test_alloc_unr(struct unrhdr *uh, u_int i, char a[]) +{ + int j; + + if (a[i]) { + printf("F %u\n", i); + free_unr(uh, i); + a[i] = 0; + } else { + no_alloc = 1; + j = alloc_unr(uh); + if (j != -1) { + a[j] = 1; + printf("A %d\n", j); + } + no_alloc = 0; + } +} + +static void +test_alloc_unr_specific(struct unrhdr *uh, u_int i, char a[]) +{ + int j; + + j = alloc_unr_specific(uh, i); + if (j == -1) { + printf("F %u\n", i); + a[i] = 0; + free_unr(uh, i); + } else { + a[i] = 1; + printf("A %d\n", j); + } +} + /* Number of unrs to test */ #define NN 10000 @@ -827,6 +989,7 @@ main(int argc __unused, const char **argv __unused) print_unrhdr(uh); memset(a, 0, sizeof a); + srandomdev(); fprintf(stderr, "sizeof(struct unr) %zu\n", sizeof(struct unr)); fprintf(stderr, "sizeof(struct unrb) %zu\n", sizeof(struct unrb)); @@ -840,19 +1003,11 @@ main(int argc __unused, const char **argv __unused) if (a[i] && (j & 1)) continue; #endif - if (a[i]) { - printf("F %u\n", i); - free_unr(uh, i); - a[i] = 0; - } else { - no_alloc = 1; - i = alloc_unr(uh); - if (i != -1) { - a[i] = 1; - printf("A %u\n", i); - } - no_alloc = 0; - } + if ((random() & 1) != 0) + test_alloc_unr(uh, i, a); + else + test_alloc_unr_specific(uh, i, a); + if (1) /* XXX: change this for detailed debug printout */ print_unrhdr(uh); check_unrhdr(uh, __LINE__); diff --git a/freebsd/sys/kern/sys_generic.c b/freebsd/sys/kern/sys_generic.c index b0201d68..deda7afd 100644 --- a/freebsd/sys/kern/sys_generic.c +++ b/freebsd/sys/kern/sys_generic.c @@ -843,6 +843,54 @@ select(struct thread *td, struct select_args *uap) } #endif /* __rtems__ */ +/* + * In the unlikely case when user specified n greater then the last + * open file descriptor, check that no bits are set after the last + * valid fd. We must return EBADF if any is set. + * + * There are applications that rely on the behaviour. + * + * nd is fd_lastfile + 1. + */ +static int +select_check_badfd(fd_set *fd_in, int nd, int ndu, int abi_nfdbits) +{ + char *addr, *oaddr; + int b, i, res; + uint8_t bits; + + if (nd >= ndu || fd_in == NULL) + return (0); + + oaddr = NULL; + bits = 0; /* silence gcc */ + for (i = nd; i < ndu; i++) { + b = i / NBBY; +#if BYTE_ORDER == LITTLE_ENDIAN + addr = (char *)fd_in + b; +#else + addr = (char *)fd_in; + if (abi_nfdbits == NFDBITS) { + addr += rounddown(b, sizeof(fd_mask)) + + sizeof(fd_mask) - 1 - b % sizeof(fd_mask); + } else { + addr += rounddown(b, sizeof(uint32_t)) + + sizeof(uint32_t) - 1 - b % sizeof(uint32_t); + } +#endif + if (addr != oaddr) { + res = fubyte(addr); + if (res == -1) + return (EFAULT); + oaddr = addr; + bits = res; + } + if ((bits & (1 << (i % NBBY))) != 0) + return (EBADF); + } + return (0); +} + int kern_select(struct thread *td, int nd, fd_set *fd_in, fd_set *fd_ou, fd_set *fd_ex, struct timeval *tvp, int abi_nfdbits) @@ -857,20 +905,30 @@ kern_select(struct thread *td, int nd, fd_set *fd_in, fd_set *fd_ou, fd_mask s_selbits[howmany(2048, NFDBITS)]; fd_mask *ibits[3], *obits[3], *selbits, *sbp; struct timeval atv, rtv, ttv; - int error, timo; + int error, lf, ndu, timo; u_int nbufbytes, ncpbytes, ncpubytes, nfdbits; if (nd < 0) return (EINVAL); + ndu = nd; #ifndef __rtems__ - fdp = td->td_proc->p_fd; - if (nd > fdp->fd_lastfile + 1) - nd = fdp->fd_lastfile + 1; + lf = fdp->fd_lastfile; #else /* __rtems__ */ (void) fdp; - if (nd > rtems_libio_number_iops) - nd = rtems_libio_number_iops; + lf = rtems_libio_number_iops; #endif /* __rtems__ */ + if (nd > lf + 1) + nd = lf + 1; + + error = select_check_badfd(fd_in, nd, ndu, abi_nfdbits); + if (error != 0) + return (error); + error = select_check_badfd(fd_ou, nd, ndu, abi_nfdbits); + if (error != 0) + return (error); + error = select_check_badfd(fd_ex, nd, ndu, abi_nfdbits); + if (error != 0) + return (error); /* * Allocate just enough bits for the non-null fd_sets. Use the @@ -1204,7 +1262,7 @@ rtems_bsd_poll(td, uap) struct pollfd *bits; struct pollfd smallbits[32]; struct timeval atv, rtv, ttv; - int error = 0, timo; + int error, timo; u_int nfds; size_t ni; @@ -1549,6 +1607,23 @@ selfdfree(struct seltd *stp, struct selfd *sfp) uma_zfree(selfd_zone, sfp); } +/* Drain the waiters tied to all the selfd belonging the specified selinfo. */ +void +seldrain(sip) + struct selinfo *sip; +{ + + /* + * This feature is already provided by doselwakeup(), thus it is + * enough to go for it. + * Eventually, the context, should take care to avoid races + * between thread calling select()/poll() and file descriptor + * detaching, but, again, the races are just the same as + * selwakeup(). + */ + doselwakeup(sip, -1); +} + /* * Record a select request. */ diff --git a/freebsd/sys/kern/sys_socket.c b/freebsd/sys/kern/sys_socket.c index 6bb4416e..e86698fa 100644 --- a/freebsd/sys/kern/sys_socket.c +++ b/freebsd/sys/kern/sys_socket.c @@ -87,9 +87,7 @@ soo_read(struct file *fp, struct uio *uio, struct ucred *active_cred, if (error) return (error); #endif - CURVNET_SET(so->so_vnet); error = soreceive(so, 0, uio, 0, 0, 0); - CURVNET_RESTORE(); return (error); } #ifdef __rtems__ @@ -210,7 +208,6 @@ soo_ioctl(struct file *fp, u_long cmd, void *data, struct ucred *active_cred, struct socket *so = fp->f_data; int error = 0; - CURVNET_SET(so->so_vnet); switch (cmd) { case FIONBIO: SOCK_LOCK(so); @@ -297,14 +294,18 @@ soo_ioctl(struct file *fp, u_long cmd, void *data, struct ucred *active_cred, */ if (IOCGROUP(cmd) == 'i') error = ifioctl(so, cmd, data, td); - else if (IOCGROUP(cmd) == 'r') + else if (IOCGROUP(cmd) == 'r') { + CURVNET_SET(so->so_vnet); error = rtioctl_fib(cmd, data, so->so_fibnum); - else + CURVNET_RESTORE(); + } else { + CURVNET_SET(so->so_vnet); error = ((*so->so_proto->pr_usrreqs->pru_control) (so, cmd, data, 0, td)); + CURVNET_RESTORE(); + } break; } - CURVNET_RESTORE(); return (error); } #ifdef __rtems__ diff --git a/freebsd/sys/kern/uipc_domain.c b/freebsd/sys/kern/uipc_domain.c index 1b2be102..7a70e246 100644 --- a/freebsd/sys/kern/uipc_domain.c +++ b/freebsd/sys/kern/uipc_domain.c @@ -222,7 +222,7 @@ domain_add(void *data) domains = dp; KASSERT(domain_init_status >= 1, - ("attempt to net_add_domain(%s) before domaininit()", + ("attempt to domain_add(%s) before domaininit()", dp->dom_name)); #ifndef INVARIANTS if (domain_init_status < 1) diff --git a/freebsd/sys/kern/uipc_mbuf.c b/freebsd/sys/kern/uipc_mbuf.c index 43255aab..f3b7c10d 100644 --- a/freebsd/sys/kern/uipc_mbuf.c +++ b/freebsd/sys/kern/uipc_mbuf.c @@ -282,7 +282,7 @@ mb_free_ext(struct mbuf *m) } /* - * Attach the the cluster from *m to *n, set up m_ext in *n + * Attach the cluster from *m to *n, set up m_ext in *n * and bump the refcount of the cluster. */ static void @@ -1412,6 +1412,11 @@ m_print(const struct mbuf *m, int maxlen) int pdata; const struct mbuf *m2; + if (m == NULL) { + printf("mbuf: %p\n", m); + return; + } + if (m->m_flags & M_PKTHDR) len = m->m_pkthdr.len; else diff --git a/freebsd/sys/kern/uipc_sockbuf.c b/freebsd/sys/kern/uipc_sockbuf.c index 48195f31..1ee7a831 100644 --- a/freebsd/sys/kern/uipc_sockbuf.c +++ b/freebsd/sys/kern/uipc_sockbuf.c @@ -534,9 +534,6 @@ sbappendstream_locked(struct sockbuf *sb, struct mbuf *m) SBLASTMBUFCHK(sb); - /* Remove all packet headers and mbuf tags to get a pure data chain. */ - m_demote(m, 1); - sbcompress(sb, m, sb->sb_mbtail); sb->sb_lastrecord = sb->sb_mb; diff --git a/freebsd/sys/kern/uipc_socket.c b/freebsd/sys/kern/uipc_socket.c index 473c3df0..5b3b9eef 100644 --- a/freebsd/sys/kern/uipc_socket.c +++ b/freebsd/sys/kern/uipc_socket.c @@ -94,6 +94,12 @@ * from a listen queue to a file descriptor, in order to prevent garbage * collection of the socket at an untimely moment. For a number of reasons, * these interfaces are not preferred, and should be avoided. + * + * NOTE: With regard to VNETs the general rule is that callers do not set + * curvnet. Exceptions to this rule include soabort(), sodisconnect(), + * sofree() (and with that sorele(), sotryfree()), as well as sonewconn() + * and sorflush(), which are usually called from a pre-set VNET context. + * sopoll() currently does not need a VNET context to be set. */ #include <sys/cdefs.h> @@ -168,6 +174,10 @@ int maxsockets; MALLOC_DEFINE(M_SONAME, "soname", "socket name"); MALLOC_DEFINE(M_PCB, "pcb", "protocol control block"); +#define VNET_SO_ASSERT(so) \ + VNET_ASSERT(curvnet != NULL, \ + ("%s:%d curvnet is NULL, so=%p", __func__, __LINE__, (so))); + static int somaxconn = SOMAXCONN; static int sysctl_somaxconn(SYSCTL_HANDLER_ARGS); /* XXX: we dont have SYSCTL_USHORT */ @@ -294,6 +304,8 @@ soalloc(struct vnet *vnet) so->so_gencnt = ++so_gencnt; ++numopensockets; #ifdef VIMAGE + VNET_ASSERT(vnet != NULL, ("%s:%d vnet is NULL, so=%p", + __func__, __LINE__, so)); vnet->vnet_sockcnt++; so->so_vnet = vnet; #endif @@ -317,6 +329,8 @@ sodealloc(struct socket *so) so->so_gencnt = ++so_gencnt; --numopensockets; /* Could be below, but faster here. */ #ifdef VIMAGE + VNET_ASSERT(so->so_vnet != NULL, ("%s:%d so_vnet is NULL, so=%p", + __func__, __LINE__, so)); so->so_vnet->vnet_sockcnt--; #endif mtx_unlock(&so_global_mtx); @@ -377,6 +391,7 @@ socreate(int dom, struct socket **aso, int type, int proto, so->so_type = type; so->so_cred = crhold(cred); if ((prp->pr_domain->dom_family == PF_INET) || + (prp->pr_domain->dom_family == PF_INET6) || (prp->pr_domain->dom_family == PF_ROUTE)) #ifndef __rtems__ so->so_fibnum = td->td_proc->p_fibnum; @@ -440,7 +455,8 @@ sonewconn(struct socket *head, int connstatus) if (over) #endif return (NULL); - VNET_ASSERT(head->so_vnet); + VNET_ASSERT(head->so_vnet != NULL, ("%s:%d so_vnet is NULL, head=%p", + __func__, __LINE__, head)); so = soalloc(head->so_vnet); if (so == NULL) return (NULL); @@ -459,6 +475,7 @@ sonewconn(struct socket *head, int connstatus) #endif knlist_init_mtx(&so->so_rcv.sb_sel.si_note, SOCKBUF_MTX(&so->so_rcv)); knlist_init_mtx(&so->so_snd.sb_sel.si_note, SOCKBUF_MTX(&so->so_snd)); + VNET_SO_ASSERT(head); if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat) || (*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL)) { sodealloc(so); @@ -533,8 +550,12 @@ sobind(struct socket *so, struct sockaddr *nam, struct thread *td) int solisten(struct socket *so, int backlog, struct thread *td) { + int error; - return ((*so->so_proto->pr_usrreqs->pru_listen)(so, backlog, td)); + CURVNET_SET(so->so_vnet); + error = (*so->so_proto->pr_usrreqs->pru_listen)(so, backlog, td); + CURVNET_RESTORE(); + return error; } int @@ -617,11 +638,12 @@ sofree(struct socket *so) so->so_qstate & SQ_COMP, so->so_qstate & SQ_INCOMP)); if (so->so_options & SO_ACCEPTCONN) { KASSERT((TAILQ_EMPTY(&so->so_comp)), ("sofree: so_comp populated")); - KASSERT((TAILQ_EMPTY(&so->so_incomp)), ("sofree: so_comp populated")); + KASSERT((TAILQ_EMPTY(&so->so_incomp)), ("sofree: so_incomp populated")); } SOCK_UNLOCK(so); ACCEPT_UNLOCK(); + VNET_SO_ASSERT(so); if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose != NULL) (*pr->pr_domain->dom_dispose)(so->so_rcv.sb_mb); if (pr->pr_usrreqs->pru_detach != NULL) @@ -643,6 +665,8 @@ sofree(struct socket *so) */ sbdestroy(&so->so_snd, so); sbdestroy(&so->so_rcv, so); + seldrain(&so->so_snd.sb_sel); + seldrain(&so->so_rcv.sb_sel); knlist_destroy(&so->so_rcv.sb_sel.si_note); knlist_destroy(&so->so_snd.sb_sel.si_note); sodealloc(so); @@ -751,6 +775,7 @@ soabort(struct socket *so) KASSERT(so->so_state & SS_NOFDREF, ("soabort: !SS_NOFDREF")); KASSERT((so->so_state & SQ_COMP) == 0, ("soabort: SQ_COMP")); KASSERT((so->so_state & SQ_INCOMP) == 0, ("soabort: SQ_INCOMP")); + VNET_SO_ASSERT(so); if (so->so_proto->pr_usrreqs->pru_abort != NULL) (*so->so_proto->pr_usrreqs->pru_abort)(so); @@ -768,7 +793,10 @@ soaccept(struct socket *so, struct sockaddr **nam) KASSERT((so->so_state & SS_NOFDREF) != 0, ("soaccept: !NOFDREF")); so->so_state &= ~SS_NOFDREF; SOCK_UNLOCK(so); + + CURVNET_SET(so->so_vnet); error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam); + CURVNET_RESTORE(); return (error); } @@ -806,8 +834,12 @@ soconnect(struct socket *so, struct sockaddr *nam, struct thread *td) int soconnect2(struct socket *so1, struct socket *so2) { + int error; - return ((*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2)); + CURVNET_SET(so1->so_vnet); + error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2); + CURVNET_RESTORE(); + return (error); } int @@ -819,6 +851,7 @@ sodisconnect(struct socket *so) return (ENOTCONN); if (so->so_state & SS_ISDISCONNECTING) return (EALREADY); + VNET_SO_ASSERT(so); error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so); return (error); } @@ -1084,6 +1117,7 @@ sosend_dgram(struct socket *so, struct sockaddr *addr, struct uio *uio, * there are probably other places that this also happens. We must * rethink this. */ + VNET_SO_ASSERT(so); error = (*so->so_proto->pr_usrreqs->pru_send)(so, (flags & MSG_OOB) ? PRUS_OOB : /* @@ -1273,6 +1307,7 @@ restart: * places that this also happens. We must rethink * this. */ + VNET_SO_ASSERT(so); error = (*so->so_proto->pr_usrreqs->pru_send)(so, (flags & MSG_OOB) ? PRUS_OOB : /* @@ -1339,6 +1374,7 @@ soreceive_rcvoob(struct socket *so, struct uio *uio, int flags) int error; KASSERT(flags & MSG_OOB, ("soreceive_rcvoob: (flags & MSG_OOB) == 0")); + VNET_SO_ASSERT(so); m = m_get(M_WAIT, MT_DATA); error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK); @@ -1447,8 +1483,10 @@ soreceive_generic(struct socket *so, struct sockaddr **psa, struct uio *uio, if (mp != NULL) *mp = NULL; if ((pr->pr_flags & PR_WANTRCVD) && (so->so_state & SS_ISCONFIRMING) - && uio->uio_resid) + && uio->uio_resid) { + VNET_SO_ASSERT(so); (*pr->pr_usrreqs->pru_rcvd)(so, 0); + } error = sblock(&so->so_rcv, SBLOCKWAIT(flags)); if (error) @@ -1461,17 +1499,11 @@ restart: * If we have less data than requested, block awaiting more (subject * to any timeout) if: * 1. the current count is less than the low water mark, or - * 2. MSG_WAITALL is set, and it is possible to do the entire - * receive operation at once if we block (resid <= hiwat). - * 3. MSG_DONTWAIT is not set - * If MSG_WAITALL is set but resid is larger than the receive buffer, - * we have to do the receive in sections, and thus risk returning a - * short count if a timeout or signal occurs after we start. + * 2. MSG_DONTWAIT is not set */ if (m == NULL || (((flags & MSG_DONTWAIT) == 0 && so->so_rcv.sb_cc < uio->uio_resid) && - (so->so_rcv.sb_cc < so->so_rcv.sb_lowat || - ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) && + so->so_rcv.sb_cc < so->so_rcv.sb_lowat && m->m_nextpkt == NULL && (pr->pr_flags & PR_ATOMIC) == 0)) { KASSERT(m != NULL || !so->so_rcv.sb_cc, ("receive: m == %p so->so_rcv.sb_cc == %u", @@ -1597,6 +1629,7 @@ dontblock: cm->m_next = NULL; if (pr->pr_domain->dom_externalize != NULL) { SOCKBUF_UNLOCK(&so->so_rcv); + VNET_SO_ASSERT(so); error = (*pr->pr_domain->dom_externalize) (cm, controlp); SOCKBUF_LOCK(&so->so_rcv); @@ -1661,8 +1694,8 @@ dontblock: * examined ('type'), end the receive operation. */ SOCKBUF_LOCK_ASSERT(&so->so_rcv); - if (m->m_type == MT_OOBDATA) { - if (type != MT_OOBDATA) + if (m->m_type == MT_OOBDATA || m->m_type == MT_CONTROL) { + if (type != m->m_type) break; } else if (type == MT_OOBDATA) break; @@ -1812,15 +1845,22 @@ dontblock: */ if (pr->pr_flags & PR_WANTRCVD) { SOCKBUF_UNLOCK(&so->so_rcv); + VNET_SO_ASSERT(so); (*pr->pr_usrreqs->pru_rcvd)(so, flags); SOCKBUF_LOCK(&so->so_rcv); } SBLASTRECORDCHK(&so->so_rcv); SBLASTMBUFCHK(&so->so_rcv); - error = sbwait(&so->so_rcv); - if (error) { - SOCKBUF_UNLOCK(&so->so_rcv); - goto release; + /* + * We could receive some data while was notifying + * the protocol. Skip blocking in this case. + */ + if (so->so_rcv.sb_mb == NULL) { + error = sbwait(&so->so_rcv); + if (error) { + SOCKBUF_UNLOCK(&so->so_rcv); + goto release; + } } m = so->so_rcv.sb_mb; if (m != NULL) @@ -1858,6 +1898,7 @@ dontblock: if (!(flags & MSG_SOCALLBCK) && (pr->pr_flags & PR_WANTRCVD)) { SOCKBUF_UNLOCK(&so->so_rcv); + VNET_SO_ASSERT(so); (*pr->pr_usrreqs->pru_rcvd)(so, flags); SOCKBUF_LOCK(&so->so_rcv); } @@ -2055,6 +2096,7 @@ deliver: (((flags & MSG_WAITALL) && uio->uio_resid > 0) || !(flags & MSG_SOCALLBCK))) { SOCKBUF_UNLOCK(sb); + VNET_SO_ASSERT(so); (*so->so_proto->pr_usrreqs->pru_rcvd)(so, flags); SOCKBUF_LOCK(sb); } @@ -2266,9 +2308,13 @@ int soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp) { + int error; - return (so->so_proto->pr_usrreqs->pru_soreceive(so, psa, uio, mp0, + CURVNET_SET(so->so_vnet); + error = (so->so_proto->pr_usrreqs->pru_soreceive(so, psa, uio, mp0, controlp, flagsp)); + CURVNET_RESTORE(); + return (error); } int @@ -2279,17 +2325,19 @@ soshutdown(struct socket *so, int how) if (!(how == SHUT_RD || how == SHUT_WR || how == SHUT_RDWR)) return (EINVAL); + + CURVNET_SET(so->so_vnet); if (pr->pr_usrreqs->pru_flush != NULL) { (*pr->pr_usrreqs->pru_flush)(so, how); } if (how != SHUT_WR) sorflush(so); if (how != SHUT_RD) { - CURVNET_SET(so->so_vnet); error = (*pr->pr_usrreqs->pru_shutdown)(so); CURVNET_RESTORE(); return (error); } + CURVNET_RESTORE(); return (0); } @@ -2300,6 +2348,8 @@ sorflush(struct socket *so) struct protosw *pr = so->so_proto; struct sockbuf asb; + VNET_SO_ASSERT(so); + /* * In order to avoid calling dom_dispose with the socket buffer mutex * held, and in order to generally avoid holding the lock for a long @@ -2313,7 +2363,6 @@ sorflush(struct socket *so) * socket buffer. Don't let our acquire be interrupted by a signal * despite any existing socket disposition on interruptable waiting. */ - CURVNET_SET(so->so_vnet); socantrcvmore(so); (void) sblock(sb, SBL_WAIT | SBL_NOINTR); @@ -2337,7 +2386,6 @@ sorflush(struct socket *so) if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose != NULL) (*pr->pr_domain->dom_dispose)(asb.sb_mb); sbrelease_internal(&asb, so); - CURVNET_RESTORE(); } /* @@ -2401,11 +2449,14 @@ sosetopt(struct socket *so, struct sockopt *sopt) struct mac extmac; #endif + CURVNET_SET(so->so_vnet); error = 0; if (sopt->sopt_level != SOL_SOCKET) { - if (so->so_proto && so->so_proto->pr_ctloutput) - return ((*so->so_proto->pr_ctloutput) - (so, sopt)); + if (so->so_proto->pr_ctloutput != NULL) { + error = (*so->so_proto->pr_ctloutput)(so, sopt); + CURVNET_RESTORE(); + return (error); + } error = ENOPROTOOPT; } else { switch (sopt->sopt_name) { @@ -2458,15 +2509,16 @@ sosetopt(struct socket *so, struct sockopt *sopt) case SO_SETFIB: error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); - if (optval < 1 || optval > rt_numfibs) { + if (optval < 0 || optval >= rt_numfibs) { error = EINVAL; goto bad; } - if ((so->so_proto->pr_domain->dom_family == PF_INET) || - (so->so_proto->pr_domain->dom_family == PF_ROUTE)) { + if (((so->so_proto->pr_domain->dom_family == PF_INET) || + (so->so_proto->pr_domain->dom_family == PF_INET6) || + (so->so_proto->pr_domain->dom_family == PF_ROUTE))) { so->so_fibnum = optval; /* Note: ignore error */ - if (so->so_proto && so->so_proto->pr_ctloutput) + if (so->so_proto->pr_ctloutput) (*so->so_proto->pr_ctloutput)(so, sopt); } else { so->so_fibnum = 0; @@ -2584,13 +2636,11 @@ sosetopt(struct socket *so, struct sockopt *sopt) error = ENOPROTOOPT; break; } - if (error == 0 && so->so_proto != NULL && - so->so_proto->pr_ctloutput != NULL) { - (void) ((*so->so_proto->pr_ctloutput) - (so, sopt)); - } + if (error == 0 && so->so_proto->pr_ctloutput != NULL) + (void)(*so->so_proto->pr_ctloutput)(so, sopt); } bad: + CURVNET_RESTORE(); return (error); } @@ -2634,13 +2684,15 @@ sogetopt(struct socket *so, struct sockopt *sopt) struct mac extmac; #endif + CURVNET_SET(so->so_vnet); error = 0; if (sopt->sopt_level != SOL_SOCKET) { - if (so->so_proto && so->so_proto->pr_ctloutput) { - return ((*so->so_proto->pr_ctloutput) - (so, sopt)); - } else - return (ENOPROTOOPT); + if (so->so_proto->pr_ctloutput != NULL) + error = (*so->so_proto->pr_ctloutput)(so, sopt); + else + error = ENOPROTOOPT; + CURVNET_RESTORE(); + return (error); } else { switch (sopt->sopt_name) { #ifdef INET @@ -2677,6 +2729,10 @@ integer: optval = so->so_type; goto integer; + case SO_PROTOCOL: + optval = so->so_proto->pr_protocol; + goto integer; + case SO_ERROR: SOCK_LOCK(so); optval = so->so_error; @@ -2724,11 +2780,11 @@ integer: error = sooptcopyin(sopt, &extmac, sizeof(extmac), sizeof(extmac)); if (error) - return (error); + goto bad; error = mac_getsockopt_label(sopt->sopt_td->td_ucred, so, &extmac); if (error) - return (error); + goto bad; error = sooptcopyout(sopt, &extmac, sizeof extmac); #else error = EOPNOTSUPP; @@ -2740,11 +2796,11 @@ integer: error = sooptcopyin(sopt, &extmac, sizeof(extmac), sizeof(extmac)); if (error) - return (error); + goto bad; error = mac_getsockopt_peerlabel( sopt->sopt_td->td_ucred, so, &extmac); if (error) - return (error); + goto bad; error = sooptcopyout(sopt, &extmac, sizeof extmac); #else error = EOPNOTSUPP; @@ -2767,8 +2823,12 @@ integer: error = ENOPROTOOPT; break; } - return (error); } +#ifdef MAC +bad: +#endif + CURVNET_RESTORE(); + return (error); } /* XXX; prepare mbuf for (__FreeBSD__ < 3) routines. */ @@ -2902,6 +2962,10 @@ sopoll(struct socket *so, int events, struct ucred *active_cred, struct thread *td) { + /* + * We do not need to set or assert curvnet as long as everyone uses + * sopoll_generic(). + */ return (so->so_proto->pr_usrreqs->pru_sopoll(so, events, active_cred, td)); } diff --git a/freebsd/sys/kern/uipc_syscalls.c b/freebsd/sys/kern/uipc_syscalls.c index 5ee11b0c..960aae87 100644 --- a/freebsd/sys/kern/uipc_syscalls.c +++ b/freebsd/sys/kern/uipc_syscalls.c @@ -368,14 +368,9 @@ rtems_bsd_listen(td, uap) so = fp->f_data; #ifdef MAC error = mac_socket_check_listen(td->td_ucred, so); - if (error == 0) { + if (error == 0) #endif - CURVNET_SET(so->so_vnet); error = solisten(so, uap->backlog, td); - CURVNET_RESTORE(); -#ifdef MAC - } -#endif fdrop(fp, td); } return(error); @@ -591,9 +586,7 @@ kern_accept(struct thread *td, int s, struct sockaddr **name, tmp = fflag & FASYNC; (void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td); sa = 0; - CURVNET_SET(so->so_vnet); error = soaccept(so, &sa); - CURVNET_RESTORE(); if (error) { /* * return a namelen of zero for older code which might @@ -953,6 +946,10 @@ kern_sendit(td, s, mp, flags, control, segflg) return (error); so = (struct socket *)fp->f_data; +#ifdef KTRACE + if (mp->msg_name != NULL && KTRPOINT(td, KTR_STRUCT)) + ktrsockaddr(mp->msg_name); +#endif #ifdef MAC if (mp->msg_name != NULL) { error = mac_socket_check_connect(td->td_ucred, so, @@ -1253,11 +1250,9 @@ kern_recvit(td, s, mp, fromseg, controlp) ktruio = cloneuio(&auio); #endif len = auio.uio_resid; - CURVNET_SET(so->so_vnet); error = soreceive(so, &fromsa, &auio, (struct mbuf **)0, (mp->msg_control || controlp) ? &control : (struct mbuf **)0, &mp->msg_flags); - CURVNET_RESTORE(); if (error) { if (auio.uio_resid != (int)len && (error == ERESTART || error == EINTR || error == EWOULDBLOCK)) @@ -1720,9 +1715,7 @@ kern_setsockopt(td, s, level, name, val, valseg, valsize) error = getsock(td->td_proc->p_fd, s, &fp, NULL); if (error == 0) { so = fp->f_data; - CURVNET_SET(so->so_vnet); error = sosetopt(so, &sopt); - CURVNET_RESTORE(); fdrop(fp, td); } return(error); @@ -1834,9 +1827,7 @@ kern_getsockopt(td, s, level, name, val, valseg, valsize) error = getsock(td->td_proc->p_fd, s, &fp, NULL); if (error == 0) { so = fp->f_data; - CURVNET_SET(so->so_vnet); error = sogetopt(so, &sopt); - CURVNET_RESTORE(); *valsize = sopt.sopt_valsize; fdrop(fp, td); } @@ -2609,11 +2600,17 @@ retry_space: } /* - * Get a sendfile buf. We usually wait as long - * as necessary, but this wait can be interrupted. + * Get a sendfile buf. When allocating the + * first buffer for mbuf chain, we usually + * wait as long as necessary, but this wait + * can be interrupted. For consequent + * buffers, do not sleep, since several + * threads might exhaust the buffers and then + * deadlock. */ - if ((sf = sf_buf_alloc(pg, - (mnw ? SFB_NOWAIT : SFB_CATCH))) == NULL) { + sf = sf_buf_alloc(pg, (mnw || m != NULL) ? SFB_NOWAIT : + SFB_CATCH); + if (sf == NULL) { mbstat.sf_allocfail++; vm_page_lock_queues(); vm_page_unwire(pg, 0); @@ -2623,7 +2620,8 @@ retry_space: if (pg->wire_count == 0 && pg->object == NULL) vm_page_free(pg); vm_page_unlock_queues(); - error = (mnw ? EAGAIN : EINTR); + if (m == NULL) + error = (mnw ? EAGAIN : EINTR); break; } @@ -2783,9 +2781,13 @@ sctp_peeloff(td, uap) error = fgetsock(td, uap->sd, &head, &fflag); if (error) goto done2; + if (head->so_proto->pr_protocol != IPPROTO_SCTP) { + error = EOPNOTSUPP; + goto done; + } error = sctp_can_peel_off(head, (sctp_assoc_t)uap->name); if (error) - goto done2; + goto done; /* * At this point we know we do have a assoc to pull * we proceed to get the fd setup. This may block @@ -2901,6 +2903,10 @@ sctp_generic_sendmsg (td, uap) iov[0].iov_len = uap->mlen; so = (struct socket *)fp->f_data; + if (so->so_proto->pr_protocol != IPPROTO_SCTP) { + error = EOPNOTSUPP; + goto sctp_bad; + } #ifdef MAC error = mac_socket_check_send(td->td_ucred, so); if (error) @@ -3011,6 +3017,10 @@ sctp_generic_sendmsg_iov(td, uap) #endif so = (struct socket *)fp->f_data; + if (so->so_proto->pr_protocol != IPPROTO_SCTP) { + error = EOPNOTSUPP; + goto sctp_bad; + } #ifdef MAC error = mac_socket_check_send(td->td_ucred, so); if (error) @@ -3115,6 +3125,10 @@ sctp_generic_recvmsg(td, uap) goto out1; so = fp->f_data; + if (so->so_proto->pr_protocol != IPPROTO_SCTP) { + error = EOPNOTSUPP; + goto out; + } #ifdef MAC error = mac_socket_check_receive(td->td_ucred, so); if (error) { |