summaryrefslogtreecommitdiffstats
path: root/freebsd/sys/kern
diff options
context:
space:
mode:
Diffstat (limited to 'freebsd/sys/kern')
-rw-r--r--freebsd/sys/kern/init_main.c4
-rw-r--r--freebsd/sys/kern/kern_event.c18
-rw-r--r--freebsd/sys/kern/kern_hhook.c456
-rw-r--r--freebsd/sys/kern/kern_intr.c32
-rw-r--r--freebsd/sys/kern/kern_khelp.c475
-rw-r--r--freebsd/sys/kern/kern_linker.c110
-rw-r--r--freebsd/sys/kern/kern_mib.c3
-rw-r--r--freebsd/sys/kern/kern_osd.c405
-rw-r--r--freebsd/sys/kern/kern_subr.c144
-rw-r--r--freebsd/sys/kern/kern_sysctl.c294
-rw-r--r--freebsd/sys/kern/kern_timeout.c524
-rw-r--r--freebsd/sys/kern/subr_bus.c251
-rw-r--r--freebsd/sys/kern/subr_kobj.c75
-rw-r--r--freebsd/sys/kern/subr_rman.c163
-rw-r--r--freebsd/sys/kern/subr_sbuf.c256
-rw-r--r--freebsd/sys/kern/subr_taskqueue.c10
-rw-r--r--freebsd/sys/kern/subr_unit.c187
-rw-r--r--freebsd/sys/kern/sys_generic.c89
-rw-r--r--freebsd/sys/kern/sys_socket.c13
-rw-r--r--freebsd/sys/kern/uipc_domain.c2
-rw-r--r--freebsd/sys/kern/uipc_mbuf.c7
-rw-r--r--freebsd/sys/kern/uipc_sockbuf.c3
-rw-r--r--freebsd/sys/kern/uipc_socket.c154
-rw-r--r--freebsd/sys/kern/uipc_syscalls.c54
24 files changed, 3113 insertions, 616 deletions
diff --git a/freebsd/sys/kern/init_main.c b/freebsd/sys/kern/init_main.c
index 84b44410..604a7f2b 100644
--- a/freebsd/sys/kern/init_main.c
+++ b/freebsd/sys/kern/init_main.c
@@ -321,6 +321,7 @@ print_version(void *data __unused)
while (len > 0 && version[len - 1] == '\n')
len--;
printf("%.*s %s\n", len, version, machine);
+ printf("%s\n", compiler_version);
}
SYSINIT(announce, SI_SUB_COPYRIGHT, SI_ORDER_FIRST, print_caddr_t,
@@ -394,6 +395,7 @@ struct sysentvec null_sysvec = {
.sv_set_syscall_retval = null_set_syscall_retval,
.sv_fetch_syscall_args = null_fetch_syscall_args,
.sv_syscallnames = NULL,
+ .sv_schedtail = NULL,
};
#endif /* __rtems__ */
@@ -478,7 +480,7 @@ proc0_init(void *dummy __unused)
td->td_user_pri = PUSER;
td->td_base_user_pri = PUSER;
td->td_priority = PVM;
- td->td_base_pri = PUSER;
+ td->td_base_pri = PVM;
td->td_oncpu = 0;
td->td_flags = TDF_INMEM|TDP_KTHREAD;
td->td_cpuset = cpuset_thread0();
diff --git a/freebsd/sys/kern/kern_event.c b/freebsd/sys/kern/kern_event.c
index d318c8ba..62498b25 100644
--- a/freebsd/sys/kern/kern_event.c
+++ b/freebsd/sys/kern/kern_event.c
@@ -530,6 +530,10 @@ knote_fork(struct knlist *list, int pid)
}
#endif /* __rtems__ */
+/*
+ * XXX: EVFILT_TIMER should perhaps live in kern_time.c beside the
+ * interval timer support code.
+ */
static int
timertoticks(intptr_t data)
{
@@ -543,7 +547,6 @@ timertoticks(intptr_t data)
return tticks;
}
-/* XXX - move to kern_timeout.c? */
static void
filt_timerexpire(void *knx)
{
@@ -553,9 +556,16 @@ filt_timerexpire(void *knx)
kn->kn_data++;
KNOTE_ACTIVATE(kn, 0); /* XXX - handle locking */
+ /*
+ * timertoticks() uses tvtohz() which always adds 1 to allow
+ * for the time until the next clock interrupt being strictly
+ * less than 1 clock tick. We don't want that here since we
+ * want to appear to be in sync with the clock interrupt even
+ * when we're delayed.
+ */
if ((kn->kn_flags & EV_ONESHOT) != EV_ONESHOT) {
calloutp = (struct callout *)kn->kn_hook;
- callout_reset_curcpu(calloutp, timertoticks(kn->kn_sdata),
+ callout_reset_curcpu(calloutp, timertoticks(kn->kn_sdata) - 1,
filt_timerexpire, kn);
}
}
@@ -563,7 +573,6 @@ filt_timerexpire(void *knx)
/*
* data contains amount of time to sleep, in milliseconds
*/
-/* XXX - move to kern_timeout.c? */
static int
filt_timerattach(struct knote *kn)
{
@@ -587,7 +596,6 @@ filt_timerattach(struct knote *kn)
return (0);
}
-/* XXX - move to kern_timeout.c? */
static void
filt_timerdetach(struct knote *kn)
{
@@ -600,7 +608,6 @@ filt_timerdetach(struct knote *kn)
kn->kn_status |= KN_DETACHED; /* knlist_remove usually clears it */
}
-/* XXX - move to kern_timeout.c? */
static int
filt_timer(struct knote *kn, long hint)
{
@@ -1851,6 +1858,7 @@ kqueue_close(struct file *fp, struct thread *td)
rtems_libio_unlock();
#endif /* __rtems__ */
+ seldrain(&kq->kq_sel);
knlist_destroy(&kq->kq_sel.si_note);
mtx_destroy(&kq->kq_lock);
#ifndef __rtems__
diff --git a/freebsd/sys/kern/kern_hhook.c b/freebsd/sys/kern/kern_hhook.c
new file mode 100644
index 00000000..3a5503a6
--- /dev/null
+++ b/freebsd/sys/kern/kern_hhook.c
@@ -0,0 +1,456 @@
+#include <machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 2010 Lawrence Stewart <lstewart@freebsd.org>
+ * Copyright (c) 2010 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Lawrence Stewart while studying at the Centre
+ * for Advanced Internet Architectures, Swinburne University of Technology,
+ * made possible in part by grants from the FreeBSD Foundation and Cisco
+ * University Research Program Fund at Community Foundation Silicon Valley.
+ *
+ * Portions of this software were developed at the Centre for Advanced
+ * Internet Architectures, Swinburne University of Technology, Melbourne,
+ * Australia by Lawrence Stewart under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/kernel.h>
+#include <sys/hhook.h>
+#include <sys/khelp.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/module_khelp.h>
+#include <sys/osd.h>
+#include <sys/queue.h>
+#include <sys/refcount.h>
+#include <sys/systm.h>
+
+#include <net/vnet.h>
+
+struct hhook {
+ hhook_func_t hhk_func;
+ struct helper *hhk_helper;
+ void *hhk_udata;
+ STAILQ_ENTRY(hhook) hhk_next;
+};
+
+static MALLOC_DEFINE(M_HHOOK, "hhook", "Helper hooks are linked off hhook_head lists");
+
+LIST_HEAD(hhookheadhead, hhook_head);
+VNET_DEFINE(struct hhookheadhead, hhook_head_list);
+#define V_hhook_head_list VNET(hhook_head_list)
+
+static struct mtx hhook_head_list_lock;
+MTX_SYSINIT(hhookheadlistlock, &hhook_head_list_lock, "hhook_head list lock",
+ MTX_DEF);
+
+/* Private function prototypes. */
+static void hhook_head_destroy(struct hhook_head *hhh);
+
+#define HHHLIST_LOCK() mtx_lock(&hhook_head_list_lock)
+#define HHHLIST_UNLOCK() mtx_unlock(&hhook_head_list_lock)
+#define HHHLIST_LOCK_ASSERT() mtx_assert(&hhook_head_list_lock, MA_OWNED)
+
+#define HHH_LOCK_INIT(hhh) rm_init(&(hhh)->hhh_lock, "hhook_head rm lock")
+#define HHH_LOCK_DESTROY(hhh) rm_destroy(&(hhh)->hhh_lock)
+#define HHH_WLOCK(hhh) rm_wlock(&(hhh)->hhh_lock)
+#define HHH_WUNLOCK(hhh) rm_wunlock(&(hhh)->hhh_lock)
+#define HHH_RLOCK(hhh, rmpt) rm_rlock(&(hhh)->hhh_lock, (rmpt))
+#define HHH_RUNLOCK(hhh, rmpt) rm_runlock(&(hhh)->hhh_lock, (rmpt))
+
+/*
+ * Run all helper hook functions for a given hook point.
+ */
+void
+hhook_run_hooks(struct hhook_head *hhh, void *ctx_data, struct osd *hosd)
+{
+ struct hhook *hhk;
+ void *hdata;
+ struct rm_priotracker rmpt;
+
+ KASSERT(hhh->hhh_refcount > 0, ("hhook_head %p refcount is 0", hhh));
+
+ HHH_RLOCK(hhh, &rmpt);
+ STAILQ_FOREACH(hhk, &hhh->hhh_hooks, hhk_next) {
+ if (hhk->hhk_helper->h_flags & HELPER_NEEDS_OSD) {
+ hdata = osd_get(OSD_KHELP, hosd, hhk->hhk_helper->h_id);
+ if (hdata == NULL)
+ continue;
+ } else
+ hdata = NULL;
+
+ /*
+ * XXXLAS: We currently ignore the int returned by the hook,
+ * but will likely want to handle it in future to allow hhook to
+ * be used like pfil and effect changes at the hhook calling
+ * site e.g. we could define a new hook type of HHOOK_TYPE_PFIL
+ * and standardise what particular return values mean and set
+ * the context data to pass exactly the same information as pfil
+ * hooks currently receive, thus replicating pfil with hhook.
+ */
+ hhk->hhk_func(hhh->hhh_type, hhh->hhh_id, hhk->hhk_udata,
+ ctx_data, hdata, hosd);
+ }
+ HHH_RUNLOCK(hhh, &rmpt);
+}
+
+/*
+ * Register a new helper hook function with a helper hook point.
+ */
+int
+hhook_add_hook(struct hhook_head *hhh, struct hookinfo *hki, uint32_t flags)
+{
+ struct hhook *hhk, *tmp;
+ int error;
+
+ error = 0;
+
+ if (hhh == NULL)
+ return (ENOENT);
+
+ hhk = malloc(sizeof(struct hhook), M_HHOOK,
+ M_ZERO | ((flags & HHOOK_WAITOK) ? M_WAITOK : M_NOWAIT));
+
+ if (hhk == NULL)
+ return (ENOMEM);
+
+ hhk->hhk_helper = hki->hook_helper;
+ hhk->hhk_func = hki->hook_func;
+ hhk->hhk_udata = hki->hook_udata;
+
+ HHH_WLOCK(hhh);
+ STAILQ_FOREACH(tmp, &hhh->hhh_hooks, hhk_next) {
+ if (tmp->hhk_func == hki->hook_func &&
+ tmp->hhk_udata == hki->hook_udata) {
+ /* The helper hook function is already registered. */
+ error = EEXIST;
+ break;
+ }
+ }
+
+ if (!error) {
+ STAILQ_INSERT_TAIL(&hhh->hhh_hooks, hhk, hhk_next);
+ hhh->hhh_nhooks++;
+ } else
+ free(hhk, M_HHOOK);
+
+ HHH_WUNLOCK(hhh);
+
+ return (error);
+}
+
+/*
+ * Lookup a helper hook point and register a new helper hook function with it.
+ */
+int
+hhook_add_hook_lookup(struct hookinfo *hki, uint32_t flags)
+{
+ struct hhook_head *hhh;
+ int error;
+
+ hhh = hhook_head_get(hki->hook_type, hki->hook_id);
+
+ if (hhh == NULL)
+ return (ENOENT);
+
+ error = hhook_add_hook(hhh, hki, flags);
+ hhook_head_release(hhh);
+
+ return (error);
+}
+
+/*
+ * Remove a helper hook function from a helper hook point.
+ */
+int
+hhook_remove_hook(struct hhook_head *hhh, struct hookinfo *hki)
+{
+ struct hhook *tmp;
+
+ if (hhh == NULL)
+ return (ENOENT);
+
+ HHH_WLOCK(hhh);
+ STAILQ_FOREACH(tmp, &hhh->hhh_hooks, hhk_next) {
+ if (tmp->hhk_func == hki->hook_func &&
+ tmp->hhk_udata == hki->hook_udata) {
+ STAILQ_REMOVE(&hhh->hhh_hooks, tmp, hhook, hhk_next);
+ free(tmp, M_HHOOK);
+ hhh->hhh_nhooks--;
+ break;
+ }
+ }
+ HHH_WUNLOCK(hhh);
+
+ return (0);
+}
+
+/*
+ * Lookup a helper hook point and remove a helper hook function from it.
+ */
+int
+hhook_remove_hook_lookup(struct hookinfo *hki)
+{
+ struct hhook_head *hhh;
+
+ hhh = hhook_head_get(hki->hook_type, hki->hook_id);
+
+ if (hhh == NULL)
+ return (ENOENT);
+
+ hhook_remove_hook(hhh, hki);
+ hhook_head_release(hhh);
+
+ return (0);
+}
+
+/*
+ * Register a new helper hook point.
+ */
+int
+hhook_head_register(int32_t hhook_type, int32_t hhook_id, struct hhook_head **hhh,
+ uint32_t flags)
+{
+ struct hhook_head *tmphhh;
+
+ tmphhh = hhook_head_get(hhook_type, hhook_id);
+
+ if (tmphhh != NULL) {
+ /* Hook point previously registered. */
+ hhook_head_release(tmphhh);
+ return (EEXIST);
+ }
+
+ /* XXXLAS: Need to implement support for non-virtualised hooks. */
+ if ((flags & HHOOK_HEADISINVNET) == 0) {
+ printf("%s: only vnet-style virtualised hooks can be used\n",
+ __func__);
+ return (EINVAL);
+ }
+
+ tmphhh = malloc(sizeof(struct hhook_head), M_HHOOK,
+ M_ZERO | ((flags & HHOOK_WAITOK) ? M_WAITOK : M_NOWAIT));
+
+ if (tmphhh == NULL)
+ return (ENOMEM);
+
+ tmphhh->hhh_type = hhook_type;
+ tmphhh->hhh_id = hhook_id;
+ tmphhh->hhh_nhooks = 0;
+ STAILQ_INIT(&tmphhh->hhh_hooks);
+ HHH_LOCK_INIT(tmphhh);
+
+ if (hhh != NULL)
+ refcount_init(&tmphhh->hhh_refcount, 1);
+ else
+ refcount_init(&tmphhh->hhh_refcount, 0);
+
+ if (flags & HHOOK_HEADISINVNET) {
+ tmphhh->hhh_flags |= HHH_ISINVNET;
+ HHHLIST_LOCK();
+ LIST_INSERT_HEAD(&V_hhook_head_list, tmphhh, hhh_next);
+ HHHLIST_UNLOCK();
+ } else {
+ /* XXXLAS: Add tmphhh to the non-virtualised list. */
+ }
+
+ *hhh = tmphhh;
+
+ return (0);
+}
+
+static void
+hhook_head_destroy(struct hhook_head *hhh)
+{
+ struct hhook *tmp, *tmp2;
+
+ HHHLIST_LOCK_ASSERT();
+
+ LIST_REMOVE(hhh, hhh_next);
+ HHH_WLOCK(hhh);
+ STAILQ_FOREACH_SAFE(tmp, &hhh->hhh_hooks, hhk_next, tmp2)
+ free(tmp, M_HHOOK);
+ HHH_WUNLOCK(hhh);
+ HHH_LOCK_DESTROY(hhh);
+ free(hhh, M_HHOOK);
+}
+
+/*
+ * Remove a helper hook point.
+ */
+int
+hhook_head_deregister(struct hhook_head *hhh)
+{
+ int error;
+
+ error = 0;
+
+ HHHLIST_LOCK();
+ if (hhh == NULL)
+ error = ENOENT;
+ else if (hhh->hhh_refcount > 1)
+ error = EBUSY;
+ else
+ hhook_head_destroy(hhh);
+ HHHLIST_UNLOCK();
+
+ return (error);
+}
+
+/*
+ * Remove a helper hook point via a hhook_head lookup.
+ */
+int
+hhook_head_deregister_lookup(int32_t hhook_type, int32_t hhook_id)
+{
+ struct hhook_head *hhh;
+ int error;
+
+ hhh = hhook_head_get(hhook_type, hhook_id);
+ error = hhook_head_deregister(hhh);
+
+ if (error == EBUSY)
+ hhook_head_release(hhh);
+
+ return (error);
+}
+
+/*
+ * Lookup and return the hhook_head struct associated with the specified type
+ * and id, or NULL if not found. If found, the hhook_head's refcount is bumped.
+ */
+struct hhook_head *
+hhook_head_get(int32_t hhook_type, int32_t hhook_id)
+{
+ struct hhook_head *hhh;
+
+ /* XXXLAS: Pick hhook_head_list based on hhook_head flags. */
+ HHHLIST_LOCK();
+ LIST_FOREACH(hhh, &V_hhook_head_list, hhh_next) {
+ if (hhh->hhh_type == hhook_type && hhh->hhh_id == hhook_id) {
+ refcount_acquire(&hhh->hhh_refcount);
+ break;
+ }
+ }
+ HHHLIST_UNLOCK();
+
+ return (hhh);
+}
+
+void
+hhook_head_release(struct hhook_head *hhh)
+{
+
+ refcount_release(&hhh->hhh_refcount);
+}
+
+/*
+ * Check the hhook_head private flags and return the appropriate public
+ * representation of the flag to the caller. The function is implemented in a
+ * way that allows us to cope with other subsystems becoming virtualised in the
+ * future.
+ */
+uint32_t
+hhook_head_is_virtualised(struct hhook_head *hhh)
+{
+ uint32_t ret;
+
+ ret = 0;
+
+ if (hhh != NULL) {
+ if (hhh->hhh_flags & HHH_ISINVNET)
+ ret = HHOOK_HEADISINVNET;
+ }
+
+ return (ret);
+}
+
+uint32_t
+hhook_head_is_virtualised_lookup(int32_t hook_type, int32_t hook_id)
+{
+ struct hhook_head *hhh;
+ uint32_t ret;
+
+ hhh = hhook_head_get(hook_type, hook_id);
+
+ if (hhh == NULL)
+ return (0);
+
+ ret = hhook_head_is_virtualised(hhh);
+ hhook_head_release(hhh);
+
+ return (ret);
+}
+
+/*
+ * Vnet created and being initialised.
+ */
+static void
+hhook_vnet_init(const void *unused __unused)
+{
+
+ LIST_INIT(&V_hhook_head_list);
+}
+
+/*
+ * Vnet being torn down and destroyed.
+ */
+static void
+hhook_vnet_uninit(const void *unused __unused)
+{
+ struct hhook_head *hhh, *tmphhh;
+
+ /*
+ * If subsystems which export helper hook points use the hhook KPI
+ * correctly, the loop below should have no work to do because the
+ * subsystem should have already called hhook_head_deregister().
+ */
+ HHHLIST_LOCK();
+ LIST_FOREACH_SAFE(hhh, &V_hhook_head_list, hhh_next, tmphhh) {
+ printf("%s: hhook_head type=%d, id=%d cleanup required\n",
+ __func__, hhh->hhh_type, hhh->hhh_id);
+ hhook_head_destroy(hhh);
+ }
+ HHHLIST_UNLOCK();
+}
+
+
+/*
+ * When a vnet is created and being initialised, init the V_hhook_head_list.
+ */
+VNET_SYSINIT(hhook_vnet_init, SI_SUB_PROTO_BEGIN, SI_ORDER_FIRST,
+ hhook_vnet_init, NULL);
+
+/*
+ * The hhook KPI provides a mechanism for subsystems which export helper hook
+ * points to clean up on vnet tear down, but in case the KPI is misused,
+ * provide a function to clean up and free memory for a vnet being destroyed.
+ */
+VNET_SYSUNINIT(hhook_vnet_uninit, SI_SUB_PROTO_BEGIN, SI_ORDER_FIRST,
+ hhook_vnet_uninit, NULL);
diff --git a/freebsd/sys/kern/kern_intr.c b/freebsd/sys/kern/kern_intr.c
index b23ce519..336866c8 100644
--- a/freebsd/sys/kern/kern_intr.c
+++ b/freebsd/sys/kern/kern_intr.c
@@ -148,22 +148,18 @@ intr_priority(enum intr_type flags)
INTR_TYPE_CAM | INTR_TYPE_MISC | INTR_TYPE_CLK | INTR_TYPE_AV);
switch (flags) {
case INTR_TYPE_TTY:
- pri = PI_TTYLOW;
+ pri = PI_TTY;
break;
case INTR_TYPE_BIO:
- /*
- * XXX We need to refine this. BSD/OS distinguishes
- * between tape and disk priorities.
- */
pri = PI_DISK;
break;
case INTR_TYPE_NET:
pri = PI_NET;
break;
case INTR_TYPE_CAM:
- pri = PI_DISK; /* XXX or PI_CAM? */
+ pri = PI_DISK;
break;
- case INTR_TYPE_AV: /* Audio/video */
+ case INTR_TYPE_AV:
pri = PI_AV;
break;
case INTR_TYPE_CLK:
@@ -202,6 +198,9 @@ ithread_update(struct intr_thread *ithd)
/* Update name and priority. */
strlcpy(td->td_name, ie->ie_fullname, sizeof(td->td_name));
+#ifdef KTR
+ sched_clear_tdname(td);
+#endif
thread_lock(td);
#ifndef __rtems__
sched_prio(td, pri);
@@ -1118,6 +1117,7 @@ int
swi_add(struct intr_event **eventp, const char *name, driver_intr_t handler,
void *arg, int pri, enum intr_type flags, void **cookiep)
{
+ struct thread *td;
struct intr_event *ie;
int error;
@@ -1138,16 +1138,15 @@ swi_add(struct intr_event **eventp, const char *name, driver_intr_t handler,
*eventp = ie;
}
error = intr_event_add_handler(ie, name, NULL, handler, arg,
- (pri * RQ_PPQ) + PI_SOFT, flags, cookiep);
+ PI_SWI(pri), flags, cookiep);
if (error)
return (error);
#ifndef __rtems__
if (pri == SWI_CLOCK) {
- struct proc *p;
- p = ie->ie_thread->it_thread->td_proc;
- PROC_LOCK(p);
- p->p_flag |= P_NOLOAD;
- PROC_UNLOCK(p);
+ td = ie->ie_thread->it_thread;
+ thread_lock(td);
+ td->td_flags |= TDF_NOLOAD;
+ thread_unlock(td);
}
#else /* __rtems__ */
// Do _not_ ignore the thread in the load avarage
@@ -1742,18 +1741,13 @@ db_dump_intrhand(struct intr_handler *ih)
case PI_AV:
db_printf("AV ");
break;
- case PI_TTYHIGH:
- case PI_TTYLOW:
+ case PI_TTY:
db_printf("TTY ");
break;
- case PI_TAPE:
- db_printf("TAPE");
- break;
case PI_NET:
db_printf("NET ");
break;
case PI_DISK:
- case PI_DISKLOW:
db_printf("DISK");
break;
case PI_DULL:
diff --git a/freebsd/sys/kern/kern_khelp.c b/freebsd/sys/kern/kern_khelp.c
new file mode 100644
index 00000000..ce8dd662
--- /dev/null
+++ b/freebsd/sys/kern/kern_khelp.c
@@ -0,0 +1,475 @@
+#include <machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 2010 Lawrence Stewart <lstewart@freebsd.org>
+ * Copyright (c) 2010 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * This software was developed by Lawrence Stewart while studying at the Centre
+ * for Advanced Internet Architectures, Swinburne University of Technology,
+ * made possible in part by grants from the FreeBSD Foundation and Cisco
+ * University Research Program Fund at Community Foundation Silicon Valley.
+ *
+ * Portions of this software were developed at the Centre for Advanced
+ * Internet Architectures, Swinburne University of Technology, Melbourne,
+ * Australia by Lawrence Stewart under sponsorship from the FreeBSD Foundation.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/kernel.h>
+#include <sys/hhook.h>
+#include <sys/jail.h>
+#include <sys/khelp.h>
+#include <rtems/bsd/sys/lock.h>
+#include <sys/malloc.h>
+#include <sys/module.h>
+#include <sys/module_khelp.h>
+#include <sys/osd.h>
+#include <sys/queue.h>
+#include <sys/refcount.h>
+#include <sys/rwlock.h>
+#include <sys/systm.h>
+
+#include <net/vnet.h>
+
+static struct rwlock khelp_list_lock;
+RW_SYSINIT(khelplistlock, &khelp_list_lock, "helper list lock");
+
+static TAILQ_HEAD(helper_head, helper) helpers = TAILQ_HEAD_INITIALIZER(helpers);
+
+/* Private function prototypes. */
+static inline void khelp_remove_osd(struct helper *h, struct osd *hosd);
+
+#define KHELP_LIST_WLOCK() rw_wlock(&khelp_list_lock)
+#define KHELP_LIST_WUNLOCK() rw_wunlock(&khelp_list_lock)
+#define KHELP_LIST_RLOCK() rw_rlock(&khelp_list_lock)
+#define KHELP_LIST_RUNLOCK() rw_runlock(&khelp_list_lock)
+#define KHELP_LIST_LOCK_ASSERT() rw_assert(&khelp_list_lock, RA_LOCKED)
+
+int
+khelp_register_helper(struct helper *h)
+{
+ struct helper *tmph;
+ int error, i, inserted;
+
+ error = 0;
+ inserted = 0;
+ refcount_init(&h->h_refcount, 0);
+ h->h_id = osd_register(OSD_KHELP, NULL, NULL);
+
+ /* It's only safe to add the hooks after osd_register(). */
+ if (h->h_nhooks > 0) {
+ for (i = 0; i < h->h_nhooks && !error; i++) {
+ /* We don't require the module to assign hook_helper. */
+ h->h_hooks[i].hook_helper = h;
+ error = khelp_add_hhook(&h->h_hooks[i], HHOOK_NOWAIT);
+ }
+
+ if (error) {
+ for (i--; i >= 0; i--)
+ khelp_remove_hhook(&h->h_hooks[i]);
+
+ osd_deregister(OSD_KHELP, h->h_id);
+ }
+ }
+
+ if (!error) {
+ KHELP_LIST_WLOCK();
+ /*
+ * Keep list of helpers sorted in descending h_id order. Due to
+ * the way osd_set() works, a sorted list ensures
+ * init_helper_osd() will operate with improved efficiency.
+ */
+ TAILQ_FOREACH(tmph, &helpers, h_next) {
+ if (tmph->h_id < h->h_id) {
+ TAILQ_INSERT_BEFORE(tmph, h, h_next);
+ inserted = 1;
+ break;
+ }
+ }
+
+ if (!inserted)
+ TAILQ_INSERT_TAIL(&helpers, h, h_next);
+ KHELP_LIST_WUNLOCK();
+ }
+
+ return (error);
+}
+
+int
+khelp_deregister_helper(struct helper *h)
+{
+ struct helper *tmph;
+ int error, i;
+
+ error = 0;
+
+ KHELP_LIST_WLOCK();
+ if (h->h_refcount > 0)
+ error = EBUSY;
+ else {
+ error = ENOENT;
+ TAILQ_FOREACH(tmph, &helpers, h_next) {
+ if (tmph == h) {
+ TAILQ_REMOVE(&helpers, h, h_next);
+ error = 0;
+ break;
+ }
+ }
+ }
+ KHELP_LIST_WUNLOCK();
+
+ if (!error) {
+ if (h->h_nhooks > 0) {
+ for (i = 0; i < h->h_nhooks; i++)
+ khelp_remove_hhook(&h->h_hooks[i]);
+ }
+ osd_deregister(OSD_KHELP, h->h_id);
+ }
+
+ return (error);
+}
+
+int
+khelp_init_osd(uint32_t classes, struct osd *hosd)
+{
+ struct helper *h;
+ void *hdata;
+ int error;
+
+ KASSERT(hosd != NULL, ("struct osd not initialised!"));
+
+ error = 0;
+
+ KHELP_LIST_RLOCK();
+ TAILQ_FOREACH(h, &helpers, h_next) {
+ /* If helper is correct class and needs to store OSD... */
+ if (h->h_classes & classes && h->h_flags & HELPER_NEEDS_OSD) {
+ hdata = uma_zalloc(h->h_zone, M_NOWAIT);
+ if (hdata == NULL) {
+ error = ENOMEM;
+ break;
+ }
+ osd_set(OSD_KHELP, hosd, h->h_id, hdata);
+ refcount_acquire(&h->h_refcount);
+ }
+ }
+
+ if (error) {
+ /* Delete OSD that was assigned prior to the error. */
+ TAILQ_FOREACH(h, &helpers, h_next) {
+ if (h->h_classes & classes)
+ khelp_remove_osd(h, hosd);
+ }
+ }
+ KHELP_LIST_RUNLOCK();
+
+ return (error);
+}
+
+int
+khelp_destroy_osd(struct osd *hosd)
+{
+ struct helper *h;
+ int error;
+
+ KASSERT(hosd != NULL, ("struct osd not initialised!"));
+
+ error = 0;
+
+ KHELP_LIST_RLOCK();
+ /*
+ * Clean up all khelp related OSD.
+ *
+ * XXXLAS: Would be nice to use something like osd_exit() here but it
+ * doesn't have the right semantics for this purpose.
+ */
+ TAILQ_FOREACH(h, &helpers, h_next)
+ khelp_remove_osd(h, hosd);
+ KHELP_LIST_RUNLOCK();
+
+ return (error);
+}
+
+static inline void
+khelp_remove_osd(struct helper *h, struct osd *hosd)
+{
+ void *hdata;
+
+ if (h->h_flags & HELPER_NEEDS_OSD) {
+ /*
+ * If the current helper uses OSD and calling osd_get()
+ * on the helper's h_id returns non-NULL, the helper has
+ * OSD attached to 'hosd' which needs to be cleaned up.
+ */
+ hdata = osd_get(OSD_KHELP, hosd, h->h_id);
+ if (hdata != NULL) {
+ uma_zfree(h->h_zone, hdata);
+ osd_del(OSD_KHELP, hosd, h->h_id);
+ refcount_release(&h->h_refcount);
+ }
+ }
+}
+
+void *
+khelp_get_osd(struct osd *hosd, int32_t id)
+{
+
+ return (osd_get(OSD_KHELP, hosd, id));
+}
+
+int32_t
+khelp_get_id(char *hname)
+{
+ struct helper *h;
+ int32_t id;
+
+ id = -1;
+
+ KHELP_LIST_RLOCK();
+ TAILQ_FOREACH(h, &helpers, h_next) {
+ if (strncmp(h->h_name, hname, HELPER_NAME_MAXLEN) == 0) {
+ id = h->h_id;
+ break;
+ }
+ }
+ KHELP_LIST_RUNLOCK();
+
+ return (id);
+}
+
+int
+khelp_add_hhook(struct hookinfo *hki, uint32_t flags)
+{
+ VNET_ITERATOR_DECL(vnet_iter);
+ int error;
+
+ error = 0;
+
+ /*
+ * XXXLAS: If a helper is dynamically adding a helper hook function at
+ * runtime using this function, we should update the helper's h_hooks
+ * struct member to include the additional hookinfo struct.
+ */
+
+ VNET_LIST_RLOCK_NOSLEEP();
+ VNET_FOREACH(vnet_iter) {
+ CURVNET_SET(vnet_iter);
+ error = hhook_add_hook_lookup(hki, flags);
+ CURVNET_RESTORE();
+#ifdef VIMAGE
+ if (error)
+ break;
+#endif
+ }
+ VNET_LIST_RUNLOCK_NOSLEEP();
+
+ return (error);
+}
+
+int
+khelp_remove_hhook(struct hookinfo *hki)
+{
+ VNET_ITERATOR_DECL(vnet_iter);
+ int error;
+
+ error = 0;
+
+ /*
+ * XXXLAS: If a helper is dynamically removing a helper hook function at
+ * runtime using this function, we should update the helper's h_hooks
+ * struct member to remove the defunct hookinfo struct.
+ */
+
+ VNET_LIST_RLOCK_NOSLEEP();
+ VNET_FOREACH(vnet_iter) {
+ CURVNET_SET(vnet_iter);
+ error = hhook_remove_hook_lookup(hki);
+ CURVNET_RESTORE();
+#ifdef VIMAGE
+ if (error)
+ break;
+#endif
+ }
+ VNET_LIST_RUNLOCK_NOSLEEP();
+
+ return (error);
+}
+
+#ifndef __rtems__
+int
+khelp_modevent(module_t mod, int event_type, void *data)
+{
+ struct khelp_modevent_data *kmd;
+ int error;
+
+ kmd = (struct khelp_modevent_data *)data;
+ error = 0;
+
+ switch(event_type) {
+ case MOD_LOAD:
+ if (kmd->helper->h_flags & HELPER_NEEDS_OSD) {
+ if (kmd->uma_zsize <= 0) {
+ printf("Use KHELP_DECLARE_MOD_UMA() instead!\n");
+ error = EDOOFUS;
+ break;
+ }
+ kmd->helper->h_zone = uma_zcreate(kmd->name,
+ kmd->uma_zsize, kmd->umactor, kmd->umadtor, NULL,
+ NULL, 0, 0);
+ if (kmd->helper->h_zone == NULL) {
+ error = ENOMEM;
+ break;
+ }
+ }
+ strlcpy(kmd->helper->h_name, kmd->name, HELPER_NAME_MAXLEN);
+ kmd->helper->h_hooks = kmd->hooks;
+ kmd->helper->h_nhooks = kmd->nhooks;
+ if (kmd->helper->mod_init != NULL)
+ error = kmd->helper->mod_init();
+ if (!error)
+ error = khelp_register_helper(kmd->helper);
+ break;
+
+ case MOD_QUIESCE:
+ case MOD_SHUTDOWN:
+ case MOD_UNLOAD:
+ error = khelp_deregister_helper(kmd->helper);
+ if (!error) {
+ if (kmd->helper->h_flags & HELPER_NEEDS_OSD)
+ uma_zdestroy(kmd->helper->h_zone);
+ if (kmd->helper->mod_destroy != NULL)
+ kmd->helper->mod_destroy();
+ } else if (error == ENOENT)
+ /* Do nothing and allow unload if helper not in list. */
+ error = 0;
+ else if (error == EBUSY)
+ printf("Khelp module \"%s\" can't unload until its "
+ "refcount drops from %d to 0.\n", kmd->name,
+ kmd->helper->h_refcount);
+ break;
+
+ default:
+ error = EINVAL;
+ break;
+ }
+
+ return (error);
+}
+#endif /* __rtems__ */
+
+/*
+ * This function is called in two separate situations:
+ *
+ * - When the kernel is booting, it is called directly by the SYSINIT framework
+ * to allow Khelp modules which were compiled into the kernel or loaded by the
+ * boot loader to insert their non-virtualised hook functions into the kernel.
+ *
+ * - When the kernel is booting or a vnet is created, this function is also
+ * called indirectly through khelp_vnet_init() by the vnet initialisation code.
+ * In this situation, Khelp modules are able to insert their virtualised hook
+ * functions into the virtualised hook points in the vnet which is being
+ * initialised. In the case where the kernel is not compiled with "options
+ * VIMAGE", this step is still run once at boot, but the hook functions get
+ * transparently inserted into the standard unvirtualised network stack.
+ */
+static void
+khelp_init(const void *vnet)
+{
+ struct helper *h;
+ int error, i, vinit;
+ int32_t htype, hid;
+
+ error = 0;
+ vinit = vnet != NULL;
+
+ KHELP_LIST_RLOCK();
+ TAILQ_FOREACH(h, &helpers, h_next) {
+ for (i = 0; i < h->h_nhooks && !error; i++) {
+ htype = h->h_hooks[i].hook_type;
+ hid = h->h_hooks[i].hook_id;
+
+ /*
+ * If we're doing a virtualised init (vinit != 0) and
+ * the hook point is virtualised, or we're doing a plain
+ * sysinit at boot and the hook point is not
+ * virtualised, insert the hook.
+ */
+ if ((hhook_head_is_virtualised_lookup(htype, hid) ==
+ HHOOK_HEADISINVNET && vinit) ||
+ (!hhook_head_is_virtualised_lookup(htype, hid) &&
+ !vinit)) {
+ error = hhook_add_hook_lookup(&h->h_hooks[i],
+ HHOOK_NOWAIT);
+ }
+ }
+
+ if (error) {
+ /* Remove any helper's hooks we successfully added. */
+ for (i--; i >= 0; i--)
+ hhook_remove_hook_lookup(&h->h_hooks[i]);
+
+ printf("%s: Failed to add hooks for helper \"%s\" (%p)",
+ __func__, h->h_name, h);
+ if (vinit)
+ printf(" to vnet %p.\n", vnet);
+ else
+ printf(".\n");
+
+ error = 0;
+ }
+ }
+ KHELP_LIST_RUNLOCK();
+}
+
+/*
+ * Vnet created and being initialised.
+ */
+static void
+khelp_vnet_init(const void *unused __unused)
+{
+
+ khelp_init(TD_TO_VNET(curthread));
+}
+
+
+/*
+ * As the kernel boots, allow Khelp modules which were compiled into the kernel
+ * or loaded by the boot loader to insert their non-virtualised hook functions
+ * into the kernel.
+ */
+SYSINIT(khelp_init, SI_SUB_PROTO_END, SI_ORDER_FIRST, khelp_init, NULL);
+
+/*
+ * When a vnet is created and being initialised, we need to insert the helper
+ * hook functions for all currently registered Khelp modules into the vnet's
+ * helper hook points. The hhook KPI provides a mechanism for subsystems which
+ * export helper hook points to clean up on vnet shutdown, so we don't need a
+ * VNET_SYSUNINIT for Khelp.
+ */
+VNET_SYSINIT(khelp_vnet_init, SI_SUB_PROTO_END, SI_ORDER_FIRST,
+ khelp_vnet_init, NULL);
diff --git a/freebsd/sys/kern/kern_linker.c b/freebsd/sys/kern/kern_linker.c
index d2318552..90c73a70 100644
--- a/freebsd/sys/kern/kern_linker.c
+++ b/freebsd/sys/kern/kern_linker.c
@@ -71,6 +71,9 @@ int kld_debug = 0;
#define KLD_LOCK() sx_xlock(&kld_sx)
#define KLD_UNLOCK() sx_xunlock(&kld_sx)
+#define KLD_DOWNGRADE() sx_downgrade(&kld_sx)
+#define KLD_LOCK_READ() sx_slock(&kld_sx)
+#define KLD_UNLOCK_READ() sx_sunlock(&kld_sx)
#define KLD_LOCKED() sx_xlocked(&kld_sx)
#define KLD_LOCK_ASSERT() do { \
if (!cold) \
@@ -389,7 +392,7 @@ linker_load_file(const char *filename, linker_file_t *result)
{
linker_class_t lc;
linker_file_t lf;
- int foundfile, error;
+ int foundfile, error, modules;
/* Refuse to load modules if securelevel raised */
if (prison0.pr_securelevel > 0)
@@ -428,11 +431,22 @@ linker_load_file(const char *filename, linker_file_t *result)
linker_file_unload(lf, LINKER_UNLOAD_FORCE);
return (error);
}
+ modules = !TAILQ_EMPTY(&lf->modules);
KLD_UNLOCK();
linker_file_register_sysctls(lf);
linker_file_sysinit(lf);
KLD_LOCK();
lf->flags |= LINKER_FILE_LINKED;
+
+ /*
+ * If all of the modules in this file failed
+ * to load, unload the file and return an
+ * error of ENOEXEC.
+ */
+ if (modules && TAILQ_EMPTY(&lf->modules)) {
+ linker_file_unload(lf, LINKER_UNLOAD_FORCE);
+ return (ENOEXEC);
+ }
*result = lf;
return (0);
}
@@ -636,7 +650,7 @@ linker_file_unload(linker_file_t file, int flags)
/*
* Inform any modules associated with this file that they are
- * being be unloaded.
+ * being unloaded.
*/
MOD_XLOCK;
for (mod = TAILQ_FIRST(&file->modules); mod; mod = next) {
@@ -647,8 +661,12 @@ linker_file_unload(linker_file_t file, int flags)
* Give the module a chance to veto the unload.
*/
if ((error = module_unload(mod)) != 0) {
+#ifdef KLD_DEBUG
+ MOD_SLOCK;
KLD_DPF(FILE, ("linker_file_unload: module %s"
- " failed unload\n", mod));
+ " failed unload\n", module_getname(mod)));
+ MOD_SUNLOCK;
+#endif
return (error);
}
MOD_XLOCK;
@@ -1030,18 +1048,24 @@ kern_kldload(struct thread *td, const char *file, int *fileid)
KLD_LOCK();
error = linker_load_module(kldname, modname, NULL, NULL, &lf);
- if (error)
- goto unlock;
+ if (error) {
+ KLD_UNLOCK();
+ goto done;
+ }
+ lf->userrefs++;
+ if (fileid != NULL)
+ *fileid = lf->id;
#ifdef HWPMC_HOOKS
+ KLD_DOWNGRADE();
pkm.pm_file = lf->filename;
pkm.pm_address = (uintptr_t) lf->address;
PMC_CALL_HOOK(td, PMC_FN_KLD_LOAD, (void *) &pkm);
-#endif
- lf->userrefs++;
- if (fileid != NULL)
- *fileid = lf->id;
-unlock:
+ KLD_UNLOCK_READ();
+#else
KLD_UNLOCK();
+#endif
+
+done:
CURVNET_RESTORE();
return (error);
}
@@ -1113,10 +1137,15 @@ kern_kldunload(struct thread *td, int fileid, int flags)
error = ENOENT;
#ifdef HWPMC_HOOKS
- if (error == 0)
+ if (error == 0) {
+ KLD_DOWNGRADE();
PMC_CALL_HOOK(td, PMC_FN_KLD_UNLOAD, (void *) &pkm);
-#endif
+ KLD_UNLOCK_READ();
+ } else
+ KLD_UNLOCK();
+#else
KLD_UNLOCK();
+#endif
CURVNET_RESTORE();
return (error);
}
@@ -1212,29 +1241,39 @@ int
kldstat(struct thread *td, struct kldstat_args *uap)
{
struct kld_file_stat stat;
- linker_file_t lf;
- int error, namelen, version, version_num;
+ int error, version;
/*
* Check the version of the user's structure.
*/
- if ((error = copyin(&uap->stat->version, &version, sizeof(version))) != 0)
+ if ((error = copyin(&uap->stat->version, &version, sizeof(version)))
+ != 0)
return (error);
- if (version == sizeof(struct kld_file_stat_1))
- version_num = 1;
- else if (version == sizeof(struct kld_file_stat))
- version_num = 2;
- else
+ if (version != sizeof(struct kld_file_stat_1) &&
+ version != sizeof(struct kld_file_stat))
return (EINVAL);
+ error = kern_kldstat(td, uap->fileid, &stat);
+ if (error != 0)
+ return (error);
+ return (copyout(&stat, uap->stat, version));
+}
+
+int
+kern_kldstat(struct thread *td, int fileid, struct kld_file_stat *stat)
+{
+ linker_file_t lf;
+ int namelen;
#ifdef MAC
+ int error;
+
error = mac_kld_check_stat(td->td_ucred);
if (error)
return (error);
#endif
KLD_LOCK();
- lf = linker_find_file_by_id(uap->fileid);
+ lf = linker_find_file_by_id(fileid);
if (lf == NULL) {
KLD_UNLOCK();
return (ENOENT);
@@ -1244,23 +1283,20 @@ kldstat(struct thread *td, struct kldstat_args *uap)
namelen = strlen(lf->filename) + 1;
if (namelen > MAXPATHLEN)
namelen = MAXPATHLEN;
- bcopy(lf->filename, &stat.name[0], namelen);
- stat.refs = lf->refs;
- stat.id = lf->id;
- stat.address = lf->address;
- stat.size = lf->size;
- if (version_num > 1) {
- /* Version 2 fields: */
- namelen = strlen(lf->pathname) + 1;
- if (namelen > MAXPATHLEN)
- namelen = MAXPATHLEN;
- bcopy(lf->pathname, &stat.pathname[0], namelen);
- }
+ bcopy(lf->filename, &stat->name[0], namelen);
+ stat->refs = lf->refs;
+ stat->id = lf->id;
+ stat->address = lf->address;
+ stat->size = lf->size;
+ /* Version 2 fields: */
+ namelen = strlen(lf->pathname) + 1;
+ if (namelen > MAXPATHLEN)
+ namelen = MAXPATHLEN;
+ bcopy(lf->pathname, &stat->pathname[0], namelen);
KLD_UNLOCK();
td->td_retval[0] = 0;
-
- return (copyout(&stat, uap->stat, version));
+ return (0);
}
int
@@ -1928,7 +1964,7 @@ linker_hwpmc_list_objects(void)
int i, nmappings;
nmappings = 0;
- KLD_LOCK();
+ KLD_LOCK_READ();
TAILQ_FOREACH(lf, &linker_files, link)
nmappings++;
@@ -1943,7 +1979,7 @@ linker_hwpmc_list_objects(void)
kobase[i].pm_address = (uintptr_t)lf->address;
i++;
}
- KLD_UNLOCK();
+ KLD_UNLOCK_READ();
KASSERT(i > 0, ("linker_hpwmc_list_objects: no kernel objects?"));
diff --git a/freebsd/sys/kern/kern_mib.c b/freebsd/sys/kern/kern_mib.c
index 25058a79..c513463d 100644
--- a/freebsd/sys/kern/kern_mib.c
+++ b/freebsd/sys/kern/kern_mib.c
@@ -108,6 +108,9 @@ SYSCTL_INT(_kern, KERN_OSREV, osrevision, CTLFLAG_RD,
SYSCTL_STRING(_kern, KERN_VERSION, version, CTLFLAG_RD|CTLFLAG_MPSAFE,
version, 0, "Kernel version");
+SYSCTL_STRING(_kern, OID_AUTO, compiler_version, CTLFLAG_RD|CTLFLAG_MPSAFE,
+ compiler_version, 0, "Version of compiler used to compile kernel");
+
SYSCTL_STRING(_kern, KERN_OSTYPE, ostype, CTLFLAG_RD|CTLFLAG_MPSAFE,
ostype, 0, "Operating system type");
diff --git a/freebsd/sys/kern/kern_osd.c b/freebsd/sys/kern/kern_osd.c
new file mode 100644
index 00000000..167607e0
--- /dev/null
+++ b/freebsd/sys/kern/kern_osd.c
@@ -0,0 +1,405 @@
+#include <machine/rtems-bsd-config.h>
+
+/*-
+ * Copyright (c) 2007 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <rtems/bsd/sys/param.h>
+#include <sys/kernel.h>
+#include <sys/systm.h>
+#include <sys/sysctl.h>
+#include <rtems/bsd/sys/errno.h>
+#include <sys/jail.h>
+#include <sys/malloc.h>
+#include <rtems/bsd/sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/rmlock.h>
+#include <sys/sx.h>
+#include <sys/queue.h>
+#include <sys/proc.h>
+#include <sys/osd.h>
+
+/* OSD (Object Specific Data) */
+
+static MALLOC_DEFINE(M_OSD, "osd", "Object Specific Data");
+
+static int osd_debug = 0;
+TUNABLE_INT("debug.osd", &osd_debug);
+SYSCTL_INT(_debug, OID_AUTO, osd, CTLFLAG_RW, &osd_debug, 0, "OSD debug level");
+
+#define OSD_DEBUG(...) do { \
+ if (osd_debug) { \
+ printf("OSD (%s:%u): ", __func__, __LINE__); \
+ printf(__VA_ARGS__); \
+ printf("\n"); \
+ } \
+} while (0)
+
+static void do_osd_del(u_int type, struct osd *osd, u_int slot,
+ int list_locked);
+
+/*
+ * Lists of objects with OSD.
+ *
+ * Lock key:
+ * (m) osd_module_lock
+ * (o) osd_object_lock
+ * (l) osd_list_lock
+ */
+static LIST_HEAD(, osd) osd_list[OSD_LAST + 1]; /* (m) */
+static osd_method_t *osd_methods[OSD_LAST + 1]; /* (m) */
+static u_int osd_nslots[OSD_LAST + 1]; /* (m) */
+static osd_destructor_t *osd_destructors[OSD_LAST + 1]; /* (o) */
+static const u_int osd_nmethods[OSD_LAST + 1] = {
+ [OSD_JAIL] = PR_MAXMETHOD,
+};
+
+static struct sx osd_module_lock[OSD_LAST + 1];
+static struct rmlock osd_object_lock[OSD_LAST + 1];
+static struct mtx osd_list_lock[OSD_LAST + 1];
+
+static void
+osd_default_destructor(void *value __unused)
+{
+ /* Do nothing. */
+}
+
+int
+osd_register(u_int type, osd_destructor_t destructor, osd_method_t *methods)
+{
+ void *newptr;
+ u_int i, m;
+
+ KASSERT(type >= OSD_FIRST && type <= OSD_LAST, ("Invalid type."));
+
+ /*
+ * If no destructor is given, use default one. We need to use some
+ * destructor, because NULL destructor means unused slot.
+ */
+ if (destructor == NULL)
+ destructor = osd_default_destructor;
+
+ sx_xlock(&osd_module_lock[type]);
+ /*
+ * First, we try to find unused slot.
+ */
+ for (i = 0; i < osd_nslots[type]; i++) {
+ if (osd_destructors[type][i] == NULL) {
+ OSD_DEBUG("Unused slot found (type=%u, slot=%u).",
+ type, i);
+ break;
+ }
+ }
+ /*
+ * If no unused slot was found, allocate one.
+ */
+ if (i == osd_nslots[type]) {
+ osd_nslots[type]++;
+ if (osd_nmethods[type] != 0)
+ osd_methods[type] = realloc(osd_methods[type],
+ sizeof(osd_method_t) * osd_nslots[type] *
+ osd_nmethods[type], M_OSD, M_WAITOK);
+ newptr = malloc(sizeof(osd_destructor_t) * osd_nslots[type],
+ M_OSD, M_WAITOK);
+ rm_wlock(&osd_object_lock[type]);
+ bcopy(osd_destructors[type], newptr,
+ sizeof(osd_destructor_t) * i);
+ free(osd_destructors[type], M_OSD);
+ osd_destructors[type] = newptr;
+ rm_wunlock(&osd_object_lock[type]);
+ OSD_DEBUG("New slot allocated (type=%u, slot=%u).",
+ type, i + 1);
+ }
+
+ osd_destructors[type][i] = destructor;
+ if (osd_nmethods[type] != 0) {
+ for (m = 0; m < osd_nmethods[type]; m++)
+ osd_methods[type][i * osd_nmethods[type] + m] =
+ methods != NULL ? methods[m] : NULL;
+ }
+ sx_xunlock(&osd_module_lock[type]);
+ return (i + 1);
+}
+
+void
+osd_deregister(u_int type, u_int slot)
+{
+ struct osd *osd, *tosd;
+
+ KASSERT(type >= OSD_FIRST && type <= OSD_LAST, ("Invalid type."));
+ KASSERT(slot > 0, ("Invalid slot."));
+ KASSERT(osd_destructors[type][slot - 1] != NULL, ("Unused slot."));
+
+ sx_xlock(&osd_module_lock[type]);
+ rm_wlock(&osd_object_lock[type]);
+ /*
+ * Free all OSD for the given slot.
+ */
+ mtx_lock(&osd_list_lock[type]);
+ LIST_FOREACH_SAFE(osd, &osd_list[type], osd_next, tosd)
+ do_osd_del(type, osd, slot, 1);
+ mtx_unlock(&osd_list_lock[type]);
+ /*
+ * Set destructor to NULL to free the slot.
+ */
+ osd_destructors[type][slot - 1] = NULL;
+ if (slot == osd_nslots[type]) {
+ osd_nslots[type]--;
+ osd_destructors[type] = realloc(osd_destructors[type],
+ sizeof(osd_destructor_t) * osd_nslots[type], M_OSD,
+ M_NOWAIT | M_ZERO);
+ if (osd_nmethods[type] != 0)
+ osd_methods[type] = realloc(osd_methods[type],
+ sizeof(osd_method_t) * osd_nslots[type] *
+ osd_nmethods[type], M_OSD, M_NOWAIT | M_ZERO);
+ /*
+ * We always reallocate to smaller size, so we assume it will
+ * always succeed.
+ */
+ KASSERT(osd_destructors[type] != NULL &&
+ (osd_nmethods[type] == 0 || osd_methods[type] != NULL),
+ ("realloc() failed"));
+ OSD_DEBUG("Deregistration of the last slot (type=%u, slot=%u).",
+ type, slot);
+ } else {
+ OSD_DEBUG("Slot deregistration (type=%u, slot=%u).",
+ type, slot);
+ }
+ rm_wunlock(&osd_object_lock[type]);
+ sx_xunlock(&osd_module_lock[type]);
+}
+
+int
+osd_set(u_int type, struct osd *osd, u_int slot, void *value)
+{
+ struct rm_priotracker tracker;
+
+ KASSERT(type >= OSD_FIRST && type <= OSD_LAST, ("Invalid type."));
+ KASSERT(slot > 0, ("Invalid slot."));
+ KASSERT(osd_destructors[type][slot - 1] != NULL, ("Unused slot."));
+
+ rm_rlock(&osd_object_lock[type], &tracker);
+ if (slot > osd->osd_nslots) {
+ if (value == NULL) {
+ OSD_DEBUG(
+ "Not allocating null slot (type=%u, slot=%u).",
+ type, slot);
+ rm_runlock(&osd_object_lock[type], &tracker);
+ return (0);
+ } else if (osd->osd_nslots == 0) {
+ /*
+ * First OSD for this object, so we need to allocate
+ * space and put it onto the list.
+ */
+ osd->osd_slots = malloc(sizeof(void *) * slot, M_OSD,
+ M_NOWAIT | M_ZERO);
+ if (osd->osd_slots == NULL) {
+ rm_runlock(&osd_object_lock[type], &tracker);
+ return (ENOMEM);
+ }
+ osd->osd_nslots = slot;
+ mtx_lock(&osd_list_lock[type]);
+ LIST_INSERT_HEAD(&osd_list[type], osd, osd_next);
+ mtx_unlock(&osd_list_lock[type]);
+ OSD_DEBUG("Setting first slot (type=%u).", type);
+ } else {
+ void *newptr;
+
+ /*
+ * Too few slots allocated here, needs to extend
+ * the array.
+ */
+ newptr = realloc(osd->osd_slots, sizeof(void *) * slot,
+ M_OSD, M_NOWAIT | M_ZERO);
+ if (newptr == NULL) {
+ rm_runlock(&osd_object_lock[type], &tracker);
+ return (ENOMEM);
+ }
+ osd->osd_slots = newptr;
+ osd->osd_nslots = slot;
+ OSD_DEBUG("Growing slots array (type=%u).", type);
+ }
+ }
+ OSD_DEBUG("Setting slot value (type=%u, slot=%u, value=%p).", type,
+ slot, value);
+ osd->osd_slots[slot - 1] = value;
+ rm_runlock(&osd_object_lock[type], &tracker);
+ return (0);
+}
+
+void *
+osd_get(u_int type, struct osd *osd, u_int slot)
+{
+ struct rm_priotracker tracker;
+ void *value;
+
+ KASSERT(type >= OSD_FIRST && type <= OSD_LAST, ("Invalid type."));
+ KASSERT(slot > 0, ("Invalid slot."));
+ KASSERT(osd_destructors[type][slot - 1] != NULL, ("Unused slot."));
+
+ rm_rlock(&osd_object_lock[type], &tracker);
+ if (slot > osd->osd_nslots) {
+ value = NULL;
+ OSD_DEBUG("Slot doesn't exist (type=%u, slot=%u).", type, slot);
+ } else {
+ value = osd->osd_slots[slot - 1];
+ OSD_DEBUG("Returning slot value (type=%u, slot=%u, value=%p).",
+ type, slot, value);
+ }
+ rm_runlock(&osd_object_lock[type], &tracker);
+ return (value);
+}
+
+void
+osd_del(u_int type, struct osd *osd, u_int slot)
+{
+ struct rm_priotracker tracker;
+
+ rm_rlock(&osd_object_lock[type], &tracker);
+ do_osd_del(type, osd, slot, 0);
+ rm_runlock(&osd_object_lock[type], &tracker);
+}
+
+static void
+do_osd_del(u_int type, struct osd *osd, u_int slot, int list_locked)
+{
+ int i;
+
+ KASSERT(type >= OSD_FIRST && type <= OSD_LAST, ("Invalid type."));
+ KASSERT(slot > 0, ("Invalid slot."));
+ KASSERT(osd_destructors[type][slot - 1] != NULL, ("Unused slot."));
+
+ OSD_DEBUG("Deleting slot (type=%u, slot=%u).", type, slot);
+
+ if (slot > osd->osd_nslots) {
+ OSD_DEBUG("Slot doesn't exist (type=%u, slot=%u).", type, slot);
+ return;
+ }
+ if (osd->osd_slots[slot - 1] != NULL) {
+ osd_destructors[type][slot - 1](osd->osd_slots[slot - 1]);
+ osd->osd_slots[slot - 1] = NULL;
+ }
+ for (i = osd->osd_nslots - 1; i >= 0; i--) {
+ if (osd->osd_slots[i] != NULL) {
+ OSD_DEBUG("Slot still has a value (type=%u, slot=%u).",
+ type, i + 1);
+ break;
+ }
+ }
+ if (i == -1) {
+ /* No values left for this object. */
+ OSD_DEBUG("No more slots left (type=%u).", type);
+ if (!list_locked)
+ mtx_lock(&osd_list_lock[type]);
+ LIST_REMOVE(osd, osd_next);
+ if (!list_locked)
+ mtx_unlock(&osd_list_lock[type]);
+ free(osd->osd_slots, M_OSD);
+ osd->osd_slots = NULL;
+ osd->osd_nslots = 0;
+ } else if (slot == osd->osd_nslots) {
+ /* This was the last slot. */
+ osd->osd_slots = realloc(osd->osd_slots,
+ sizeof(void *) * (i + 1), M_OSD, M_NOWAIT | M_ZERO);
+ /*
+ * We always reallocate to smaller size, so we assume it will
+ * always succeed.
+ */
+ KASSERT(osd->osd_slots != NULL, ("realloc() failed"));
+ osd->osd_nslots = i + 1;
+ OSD_DEBUG("Reducing slots array to %u (type=%u).",
+ osd->osd_nslots, type);
+ }
+}
+
+int
+osd_call(u_int type, u_int method, void *obj, void *data)
+{
+ osd_method_t methodfun;
+ int error, i;
+
+ KASSERT(type >= OSD_FIRST && type <= OSD_LAST, ("Invalid type."));
+ KASSERT(method < osd_nmethods[type], ("Invalid method."));
+
+ /*
+ * Call this method for every slot that defines it, stopping if an
+ * error is encountered.
+ */
+ error = 0;
+ sx_slock(&osd_module_lock[type]);
+ for (i = 0; i < osd_nslots[type]; i++) {
+ methodfun =
+ osd_methods[type][i * osd_nmethods[type] + method];
+ if (methodfun != NULL && (error = methodfun(obj, data)) != 0)
+ break;
+ }
+ sx_sunlock(&osd_module_lock[type]);
+ return (error);
+}
+
+void
+osd_exit(u_int type, struct osd *osd)
+{
+ struct rm_priotracker tracker;
+ u_int i;
+
+ KASSERT(type >= OSD_FIRST && type <= OSD_LAST, ("Invalid type."));
+
+ if (osd->osd_nslots == 0) {
+ KASSERT(osd->osd_slots == NULL, ("Non-null osd_slots."));
+ /* No OSD attached, just leave. */
+ return;
+ }
+
+ rm_rlock(&osd_object_lock[type], &tracker);
+ for (i = 1; i <= osd->osd_nslots; i++) {
+ if (osd_destructors[type][i - 1] != NULL)
+ do_osd_del(type, osd, i, 0);
+ else
+ OSD_DEBUG("Unused slot (type=%u, slot=%u).", type, i);
+ }
+ rm_runlock(&osd_object_lock[type], &tracker);
+ OSD_DEBUG("Object exit (type=%u).", type);
+}
+
+static void
+osd_init(void *arg __unused)
+{
+ u_int i;
+
+ for (i = OSD_FIRST; i <= OSD_LAST; i++) {
+ osd_nslots[i] = 0;
+ LIST_INIT(&osd_list[i]);
+ sx_init(&osd_module_lock[i], "osd_module");
+ rm_init(&osd_object_lock[i], "osd_object");
+ mtx_init(&osd_list_lock[i], "osd_list", NULL, MTX_DEF);
+ osd_destructors[i] = NULL;
+ osd_methods[i] = NULL;
+ }
+}
+SYSINIT(osd, SI_SUB_LOCK, SI_ORDER_ANY, osd_init, NULL);
diff --git a/freebsd/sys/kern/kern_subr.c b/freebsd/sys/kern/kern_subr.c
index ec671200..0cbc75b9 100644
--- a/freebsd/sys/kern/kern_subr.c
+++ b/freebsd/sys/kern/kern_subr.c
@@ -47,6 +47,7 @@ __FBSDID("$FreeBSD$");
#include <sys/ktr.h>
#include <sys/limits.h>
#include <rtems/bsd/sys/lock.h>
+#include <sys/mman.h>
#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/malloc.h>
@@ -56,6 +57,7 @@ __FBSDID("$FreeBSD$");
#include <sys/vnode.h>
#include <vm/vm.h>
+#include <vm/vm_extern.h>
#include <vm/vm_page.h>
#include <vm/vm_map.h>
#include <sys/uio.h>
@@ -67,7 +69,11 @@ __FBSDID("$FreeBSD$");
#ifndef __rtems__
SYSCTL_INT(_kern, KERN_IOV_MAX, iov_max, CTLFLAG_RD, NULL, UIO_MAXIOV,
"Maximum number of elements in an I/O vector; sysconf(_SC_IOV_MAX)");
+#endif /* __rtems__ */
+
+static int uiomove_faultflag(void *cp, int n, struct uio *uio, int nofault);
+#ifndef __rtems__
#ifdef ZERO_COPY_SOCKETS
/* Declared in uipc_socket.c */
extern int so_zero_copy_receive;
@@ -132,31 +138,75 @@ retry:
return(KERN_SUCCESS);
}
#endif /* ZERO_COPY_SOCKETS */
+
+int
+copyin_nofault(const void *udaddr, void *kaddr, size_t len)
+{
+ int error, save;
+
+ save = vm_fault_disable_pagefaults();
+ error = copyin(udaddr, kaddr, len);
+ vm_fault_enable_pagefaults(save);
+ return (error);
+}
+
+int
+copyout_nofault(const void *kaddr, void *udaddr, size_t len)
+{
+ int error, save;
+
+ save = vm_fault_disable_pagefaults();
+ error = copyout(kaddr, udaddr, len);
+ vm_fault_enable_pagefaults(save);
+ return (error);
+}
#endif /* __rtems__ */
int
uiomove(void *cp, int n, struct uio *uio)
{
+
+ return (uiomove_faultflag(cp, n, uio, 0));
+}
+
+int
+uiomove_nofault(void *cp, int n, struct uio *uio)
+{
+
+ return (uiomove_faultflag(cp, n, uio, 1));
+}
+
+static int
+uiomove_faultflag(void *cp, int n, struct uio *uio, int nofault)
+{
#ifndef __rtems__
- struct thread *td = curthread;
+ struct thread *td;
#endif /* __rtems__ */
struct iovec *iov;
u_int cnt;
- int error = 0;
-#ifndef __rtems__
- int save = 0;
-#endif /* __rtems__ */
+ int error, newflags, save;
KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE,
("uiomove: mode"));
- KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == curthread,
+ KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == td,
("uiomove proc"));
- WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
- "Calling uiomove()");
+ if (!nofault)
+ WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
+ "Calling uiomove()");
#ifndef __rtems__
- save = td->td_pflags & TDP_DEADLKTREAT;
- td->td_pflags |= TDP_DEADLKTREAT;
+ /* XXX does it make a sense to set TDP_DEADLKTREAT for UIO_SYSSPACE ? */
+ newflags = TDP_DEADLKTREAT;
+ if (uio->uio_segflg == UIO_USERSPACE && nofault) {
+ /*
+ * Fail if a non-spurious page fault occurs.
+ */
+ newflags |= TDP_NOFAULTING | TDP_RESETSPUR;
+ }
+ save = curthread_pflags_set(newflags);
+#else /* __rtems__ */
+ (void) newflags;
+ (void) save;
#endif /* __rtems__ */
while (n > 0 && uio->uio_resid) {
@@ -203,8 +253,7 @@ uiomove(void *cp, int n, struct uio *uio)
}
out:
#ifndef __rtems__
- if (save == 0)
- td->td_pflags &= ~TDP_DEADLKTREAT;
+ curthread_pflags_restore(save);
#endif /* __rtems__ */
return (error);
}
@@ -388,9 +437,7 @@ hashinit_flags(int elements, struct malloc_type *type, u_long *hashmask,
LIST_HEAD(generic, generic) *hashtbl;
int i;
- if (elements <= 0)
- panic("hashinit: bad elements");
-
+ KASSERT(elements > 0, ("%s: bad elements", __func__));
/* Exactly one of HASH_WAITOK and HASH_NOWAIT must be set. */
KASSERT((flags & HASH_WAITOK) ^ (flags & HASH_NOWAIT),
("Bad flags (0x%x) passed to hashinit_flags", flags));
@@ -431,8 +478,7 @@ hashdestroy(void *vhashtbl, struct malloc_type *type, u_long hashmask)
hashtbl = vhashtbl;
for (hp = hashtbl; hp <= &hashtbl[hashmask]; hp++)
- if (!LIST_EMPTY(hp))
- panic("hashdestroy: hash not empty");
+ KASSERT(LIST_EMPTY(hp), ("%s: hash not empty", __func__));
free(hashtbl, type);
}
@@ -451,8 +497,7 @@ phashinit(int elements, struct malloc_type *type, u_long *nentries)
LIST_HEAD(generic, generic) *hashtbl;
int i;
- if (elements <= 0)
- panic("phashinit: bad elements");
+ KASSERT(elements > 0, ("%s: bad elements", __func__));
for (i = 1, hashsize = primes[1]; hashsize <= elements;) {
i++;
if (i == NPRIMES)
@@ -471,16 +516,8 @@ phashinit(int elements, struct malloc_type *type, u_long *nentries)
void
uio_yield(void)
{
- struct thread *td;
- td = curthread;
- DROP_GIANT();
- thread_lock(td);
- sched_prio(td, td->td_user_pri);
- mi_switch(SW_INVOL | SWT_RELINQUISH, NULL);
- thread_unlock(td);
- rtems_task_wake_after(RTEMS_YIELD_PROCESSOR);
- PICKUP_GIANT();
+ kern_yield(PRI_USER);
}
int
@@ -591,4 +628,55 @@ cloneuio(struct uio *uiop)
bcopy(uiop->uio_iov, uio->uio_iov, iovlen);
return (uio);
}
+
+/*
+ * Map some anonymous memory in user space of size sz, rounded up to the page
+ * boundary.
+ */
+int
+copyout_map(struct thread *td, vm_offset_t *addr, size_t sz)
+{
+ struct vmspace *vms;
+ int error;
+ vm_size_t size;
+
+ vms = td->td_proc->p_vmspace;
+
+ /*
+ * Map somewhere after heap in process memory.
+ */
+ PROC_LOCK(td->td_proc);
+ *addr = round_page((vm_offset_t)vms->vm_daddr +
+ lim_max(td->td_proc, RLIMIT_DATA));
+ PROC_UNLOCK(td->td_proc);
+
+ /* round size up to page boundry */
+ size = (vm_size_t)round_page(sz);
+
+ error = vm_mmap(&vms->vm_map, addr, size, PROT_READ | PROT_WRITE,
+ VM_PROT_ALL, MAP_PRIVATE | MAP_ANON, OBJT_DEFAULT, NULL, 0);
+
+ return (error);
+}
+
+/*
+ * Unmap memory in user space.
+ */
+int
+copyout_unmap(struct thread *td, vm_offset_t addr, size_t sz)
+{
+ vm_map_t map;
+ vm_size_t size;
+
+ if (sz == 0)
+ return (0);
+
+ map = &td->td_proc->p_vmspace->vm_map;
+ size = (vm_size_t)round_page(sz);
+
+ if (vm_map_remove(map, addr, addr + size) != KERN_SUCCESS)
+ return (EINVAL);
+
+ return (0);
+}
#endif /* __rtems__ */
diff --git a/freebsd/sys/kern/kern_sysctl.c b/freebsd/sys/kern/kern_sysctl.c
index 750de376..fad7ec10 100644
--- a/freebsd/sys/kern/kern_sysctl.c
+++ b/freebsd/sys/kern/kern_sysctl.c
@@ -44,6 +44,7 @@ __FBSDID("$FreeBSD$");
#include <rtems/bsd/local/opt_ktrace.h>
#include <rtems/bsd/sys/param.h>
+#include <sys/fail.h>
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/sysctl.h>
@@ -53,6 +54,7 @@ __FBSDID("$FreeBSD$");
#include <sys/jail.h>
#include <rtems/bsd/sys/lock.h>
#include <sys/mutex.h>
+#include <sys/sbuf.h>
#include <sys/sx.h>
#include <sys/sysproto.h>
#include <sys/uio.h>
@@ -87,13 +89,12 @@ static MALLOC_DEFINE(M_SYSCTLTMP, "sysctltmp", "sysctl temp output buffer");
static struct sx sysctllock;
static struct sx sysctlmemlock;
-#define SYSCTL_SLOCK() sx_slock(&sysctllock)
-#define SYSCTL_SUNLOCK() sx_sunlock(&sysctllock)
#define SYSCTL_XLOCK() sx_xlock(&sysctllock)
#define SYSCTL_XUNLOCK() sx_xunlock(&sysctllock)
#define SYSCTL_ASSERT_XLOCKED() sx_assert(&sysctllock, SA_XLOCKED)
-#define SYSCTL_ASSERT_LOCKED() sx_assert(&sysctllock, SA_LOCKED)
#define SYSCTL_INIT() sx_init(&sysctllock, "sysctl lock")
+#define SYSCTL_SLEEP(ch, wmesg, timo) \
+ sx_sleep(ch, &sysctllock, 0, wmesg, timo)
static int sysctl_root(SYSCTL_HANDLER_ARGS);
@@ -107,7 +108,7 @@ sysctl_find_oidname(const char *name, struct sysctl_oid_list *list)
{
struct sysctl_oid *oidp;
- SYSCTL_ASSERT_LOCKED();
+ SYSCTL_ASSERT_XLOCKED();
SLIST_FOREACH(oidp, list, oid_link) {
if (strcmp(oidp->oid_name, name) == 0) {
return (oidp);
@@ -314,7 +315,7 @@ sysctl_ctx_entry_find(struct sysctl_ctx_list *clist, struct sysctl_oid *oidp)
{
struct sysctl_ctx_entry *e;
- SYSCTL_ASSERT_LOCKED();
+ SYSCTL_ASSERT_XLOCKED();
if (clist == NULL || oidp == NULL)
return(NULL);
TAILQ_FOREACH(e, clist, link) {
@@ -410,10 +411,20 @@ sysctl_remove_oid_locked(struct sysctl_oid *oidp, int del, int recurse)
}
sysctl_unregister_oid(oidp);
if (del) {
+ /*
+ * Wait for all threads running the handler to drain.
+ * This preserves the previous behavior when the
+ * sysctl lock was held across a handler invocation,
+ * and is necessary for module unload correctness.
+ */
+ while (oidp->oid_running > 0) {
+ oidp->oid_kind |= CTLFLAG_DYING;
+ SYSCTL_SLEEP(&oidp->oid_running, "oidrm", 0);
+ }
if (oidp->oid_descr)
- free((void *)(uintptr_t)(const void *)oidp->oid_descr, M_SYSCTLOID);
- free((void *)(uintptr_t)(const void *)oidp->oid_name,
- M_SYSCTLOID);
+ free(__DECONST(char *, oidp->oid_descr),
+ M_SYSCTLOID);
+ free(__DECONST(char *, oidp->oid_name), M_SYSCTLOID);
free(oidp, M_SYSCTLOID);
}
}
@@ -430,8 +441,6 @@ sysctl_add_oid(struct sysctl_ctx_list *clist, struct sysctl_oid_list *parent,
int (*handler)(SYSCTL_HANDLER_ARGS), const char *fmt, const char *descr)
{
struct sysctl_oid *oidp;
- ssize_t len;
- char *newname;
/* You have to hook up somewhere.. */
if (parent == NULL)
@@ -458,11 +467,7 @@ sysctl_add_oid(struct sysctl_ctx_list *clist, struct sysctl_oid_list *parent,
SLIST_NEXT(oidp, oid_link) = NULL;
oidp->oid_number = number;
oidp->oid_refcnt = 1;
- len = strlen(name);
- newname = malloc(len + 1, M_SYSCTLOID, M_WAITOK);
- bcopy(name, newname, len + 1);
- newname[len] = '\0';
- oidp->oid_name = newname;
+ oidp->oid_name = strdup(name, M_SYSCTLOID);
oidp->oid_handler = handler;
oidp->oid_kind = CTLFLAG_DYN | kind;
if ((kind & CTLTYPE) == CTLTYPE_NODE) {
@@ -475,12 +480,8 @@ sysctl_add_oid(struct sysctl_ctx_list *clist, struct sysctl_oid_list *parent,
oidp->oid_arg2 = arg2;
}
oidp->oid_fmt = fmt;
- if (descr) {
- int len = strlen(descr) + 1;
- oidp->oid_descr = malloc(len, M_SYSCTLOID, M_WAITOK);
- if (oidp->oid_descr)
- strcpy((char *)(uintptr_t)(const void *)oidp->oid_descr, descr);
- }
+ if (descr)
+ oidp->oid_descr = strdup(descr, M_SYSCTLOID);
/* Update the context, if used */
if (clist != NULL)
sysctl_ctx_entry_add(clist, oidp);
@@ -496,16 +497,12 @@ sysctl_add_oid(struct sysctl_ctx_list *clist, struct sysctl_oid_list *parent,
void
sysctl_rename_oid(struct sysctl_oid *oidp, const char *name)
{
- ssize_t len;
char *newname;
- void *oldname;
+ char *oldname;
- len = strlen(name);
- newname = malloc(len + 1, M_SYSCTLOID, M_WAITOK);
- bcopy(name, newname, len + 1);
- newname[len] = '\0';
+ newname = strdup(name, M_SYSCTLOID);
SYSCTL_XLOCK();
- oldname = (void *)(uintptr_t)(const void *)oidp->oid_name;
+ oldname = __DECONST(char *, oidp->oid_name);
oidp->oid_name = newname;
SYSCTL_XUNLOCK();
free(oldname, M_SYSCTLOID);
@@ -582,7 +579,7 @@ sysctl_sysctl_debug_dump_node(struct sysctl_oid_list *l, int i)
int k;
struct sysctl_oid *oidp;
- SYSCTL_ASSERT_LOCKED();
+ SYSCTL_ASSERT_XLOCKED();
SLIST_FOREACH(oidp, l, oid_link) {
for (k=0; k<i; k++)
@@ -623,7 +620,9 @@ sysctl_sysctl_debug(SYSCTL_HANDLER_ARGS)
error = priv_check(req->td, PRIV_SYSCTL_DEBUG);
if (error)
return (error);
+ SYSCTL_XLOCK();
sysctl_sysctl_debug_dump_node(&sysctl__children, 0);
+ SYSCTL_XUNLOCK();
return (ENOENT);
}
@@ -641,7 +640,7 @@ sysctl_sysctl_name(SYSCTL_HANDLER_ARGS)
struct sysctl_oid_list *lsp = &sysctl__children, *lsp2;
char buf[10];
- SYSCTL_ASSERT_LOCKED();
+ SYSCTL_XLOCK();
while (namelen) {
if (!lsp) {
snprintf(buf,sizeof(buf),"%d",*name);
@@ -650,7 +649,7 @@ sysctl_sysctl_name(SYSCTL_HANDLER_ARGS)
if (!error)
error = SYSCTL_OUT(req, buf, strlen(buf));
if (error)
- return (error);
+ goto out;
namelen--;
name++;
continue;
@@ -666,7 +665,7 @@ sysctl_sysctl_name(SYSCTL_HANDLER_ARGS)
error = SYSCTL_OUT(req, oid->oid_name,
strlen(oid->oid_name));
if (error)
- return (error);
+ goto out;
namelen--;
name++;
@@ -677,12 +676,15 @@ sysctl_sysctl_name(SYSCTL_HANDLER_ARGS)
if (oid->oid_handler)
break;
- lsp2 = (struct sysctl_oid_list *)oid->oid_arg1;
+ lsp2 = SYSCTL_CHILDREN(oid);
break;
}
lsp = lsp2;
}
- return (SYSCTL_OUT(req, "", 1));
+ error = SYSCTL_OUT(req, "", 1);
+ out:
+ SYSCTL_XUNLOCK();
+ return (error);
}
static SYSCTL_NODE(_sysctl, 1, name, CTLFLAG_RD, sysctl_sysctl_name, "");
@@ -693,7 +695,7 @@ sysctl_sysctl_next_ls(struct sysctl_oid_list *lsp, int *name, u_int namelen,
{
struct sysctl_oid *oidp;
- SYSCTL_ASSERT_LOCKED();
+ SYSCTL_ASSERT_XLOCKED();
*len = level;
SLIST_FOREACH(oidp, lsp, oid_link) {
*next = oidp->oid_number;
@@ -708,7 +710,7 @@ sysctl_sysctl_next_ls(struct sysctl_oid_list *lsp, int *name, u_int namelen,
if (oidp->oid_handler)
/* We really should call the handler here...*/
return (0);
- lsp = (struct sysctl_oid_list *)oidp->oid_arg1;
+ lsp = SYSCTL_CHILDREN(oidp);
if (!sysctl_sysctl_next_ls(lsp, 0, 0, next+1,
len, level+1, oidpp))
return (0);
@@ -723,7 +725,7 @@ sysctl_sysctl_next_ls(struct sysctl_oid_list *lsp, int *name, u_int namelen,
return (0);
if (oidp->oid_handler)
return (0);
- lsp = (struct sysctl_oid_list *)oidp->oid_arg1;
+ lsp = SYSCTL_CHILDREN(oidp);
if (!sysctl_sysctl_next_ls(lsp, name+1, namelen-1,
next+1, len, level+1, oidpp))
return (0);
@@ -735,7 +737,7 @@ sysctl_sysctl_next_ls(struct sysctl_oid_list *lsp, int *name, u_int namelen,
if (oidp->oid_handler)
continue;
- lsp = (struct sysctl_oid_list *)oidp->oid_arg1;
+ lsp = SYSCTL_CHILDREN(oidp);
if (!sysctl_sysctl_next_ls(lsp, name+1, namelen-1, next+1,
len, level+1, oidpp))
return (0);
@@ -757,7 +759,9 @@ sysctl_sysctl_next(SYSCTL_HANDLER_ARGS)
struct sysctl_oid_list *lsp = &sysctl__children;
int newoid[CTL_MAXNAME];
+ SYSCTL_XLOCK();
i = sysctl_sysctl_next_ls(lsp, name, namelen, newoid, &j, 1, &oid);
+ SYSCTL_XUNLOCK();
if (i)
return (ENOENT);
error = SYSCTL_OUT(req, newoid, j * sizeof (int));
@@ -769,39 +773,26 @@ static SYSCTL_NODE(_sysctl, 2, next, CTLFLAG_RD, sysctl_sysctl_next, "");
static int
name2oid(char *name, int *oid, int *len, struct sysctl_oid **oidpp)
{
- int i;
struct sysctl_oid *oidp;
struct sysctl_oid_list *lsp = &sysctl__children;
char *p;
- SYSCTL_ASSERT_LOCKED();
-
- if (!*name)
- return (ENOENT);
-
- p = name + strlen(name) - 1 ;
- if (*p == '.')
- *p = '\0';
-
- *len = 0;
-
- for (p = name; *p && *p != '.'; p++)
- ;
- i = *p;
- if (i == '.')
- *p = '\0';
+ SYSCTL_ASSERT_XLOCKED();
- oidp = SLIST_FIRST(lsp);
+ for (*len = 0; *len < CTL_MAXNAME;) {
+ p = strsep(&name, ".");
- while (oidp && *len < CTL_MAXNAME) {
- if (strcmp(name, oidp->oid_name)) {
- oidp = SLIST_NEXT(oidp, oid_link);
- continue;
+ oidp = SLIST_FIRST(lsp);
+ for (;; oidp = SLIST_NEXT(oidp, oid_link)) {
+ if (oidp == NULL)
+ return (ENOENT);
+ if (strcmp(p, oidp->oid_name) == 0)
+ break;
}
*oid++ = oidp->oid_number;
(*len)++;
- if (!i) {
+ if (name == NULL || *name == '\0') {
if (oidpp)
*oidpp = oidp;
return (0);
@@ -813,14 +804,7 @@ name2oid(char *name, int *oid, int *len, struct sysctl_oid **oidpp)
if (oidp->oid_handler)
break;
- lsp = (struct sysctl_oid_list *)oidp->oid_arg1;
- oidp = SLIST_FIRST(lsp);
- name = p+1;
- for (p = name; *p && *p != '.'; p++)
- ;
- i = *p;
- if (i == '.')
- *p = '\0';
+ lsp = SYSCTL_CHILDREN(oidp);
}
return (ENOENT);
}
@@ -832,8 +816,6 @@ sysctl_sysctl_name2oid(SYSCTL_HANDLER_ARGS)
int error, oid[CTL_MAXNAME], len;
struct sysctl_oid *op = 0;
- SYSCTL_ASSERT_LOCKED();
-
if (!req->newlen)
return (ENOENT);
if (req->newlen >= MAXPATHLEN) /* XXX arbitrary, undocumented */
@@ -848,8 +830,10 @@ sysctl_sysctl_name2oid(SYSCTL_HANDLER_ARGS)
}
p [req->newlen] = '\0';
-
+ len = 0;
+ SYSCTL_XLOCK();
error = name2oid(p, oid, &len, &op);
+ SYSCTL_XUNLOCK();
free(p, M_SYSCTL);
@@ -869,16 +853,21 @@ sysctl_sysctl_oidfmt(SYSCTL_HANDLER_ARGS)
struct sysctl_oid *oid;
int error;
+ SYSCTL_XLOCK();
error = sysctl_find_oid(arg1, arg2, &oid, NULL, req);
if (error)
- return (error);
+ goto out;
- if (!oid->oid_fmt)
- return (ENOENT);
+ if (oid->oid_fmt == NULL) {
+ error = ENOENT;
+ goto out;
+ }
error = SYSCTL_OUT(req, &oid->oid_kind, sizeof(oid->oid_kind));
if (error)
- return (error);
+ goto out;
error = SYSCTL_OUT(req, oid->oid_fmt, strlen(oid->oid_fmt) + 1);
+ out:
+ SYSCTL_XUNLOCK();
return (error);
}
@@ -892,13 +881,18 @@ sysctl_sysctl_oiddescr(SYSCTL_HANDLER_ARGS)
struct sysctl_oid *oid;
int error;
+ SYSCTL_XLOCK();
error = sysctl_find_oid(arg1, arg2, &oid, NULL, req);
if (error)
- return (error);
+ goto out;
- if (!oid->oid_descr)
- return (ENOENT);
+ if (oid->oid_descr == NULL) {
+ error = ENOENT;
+ goto out;
+ }
error = SYSCTL_OUT(req, oid->oid_descr, strlen(oid->oid_descr) + 1);
+ out:
+ SYSCTL_XUNLOCK();
return (error);
}
@@ -966,7 +960,10 @@ sysctl_msec_to_ticks(SYSCTL_HANDLER_ARGS)
/*
- * Handle a long, signed or unsigned. arg1 points to it.
+ * Handle a long, signed or unsigned.
+ * Two cases:
+ * a variable: point arg1 at it.
+ * a constant: pass it in arg2.
*/
int
@@ -981,9 +978,10 @@ sysctl_handle_long(SYSCTL_HANDLER_ARGS)
/*
* Attempt to get a coherent snapshot by making a copy of the data.
*/
- if (!arg1)
- return (EINVAL);
- tmplong = *(long *)arg1;
+ if (arg1)
+ tmplong = *(long *)arg1;
+ else
+ tmplong = arg2;
#ifdef SCTL_MASK32
if (req->flags & SCTL_MASK32) {
tmpint = tmplong;
@@ -995,18 +993,24 @@ sysctl_handle_long(SYSCTL_HANDLER_ARGS)
if (error || !req->newptr)
return (error);
+ if (!arg1)
+ error = EPERM;
#ifdef SCTL_MASK32
- if (req->flags & SCTL_MASK32) {
+ else if (req->flags & SCTL_MASK32) {
error = SYSCTL_IN(req, &tmpint, sizeof(int));
*(long *)arg1 = (long)tmpint;
- } else
+ }
#endif
+ else
error = SYSCTL_IN(req, arg1, sizeof(long));
return (error);
}
/*
- * Handle a 64 bit int, signed or unsigned. arg1 points to it.
+ * Handle a 64 bit int, signed or unsigned.
+ * Two cases:
+ * a variable: point arg1 at it.
+ * a constant: pass it in arg2.
*/
int
@@ -1018,15 +1022,19 @@ sysctl_handle_quad(SYSCTL_HANDLER_ARGS)
/*
* Attempt to get a coherent snapshot by making a copy of the data.
*/
- if (!arg1)
- return (EINVAL);
- tmpout = *(uint64_t *)arg1;
+ if (arg1)
+ tmpout = *(uint64_t *)arg1;
+ else
+ tmpout = arg2;
error = SYSCTL_OUT(req, &tmpout, sizeof(uint64_t));
if (error || !req->newptr)
return (error);
- error = SYSCTL_IN(req, arg1, sizeof(uint64_t));
+ if (!arg1)
+ error = EPERM;
+ else
+ error = SYSCTL_IN(req, arg1, sizeof(uint64_t));
return (error);
}
@@ -1192,9 +1200,9 @@ kernel_sysctl(struct thread *td, int *name, u_int namelen, void *old,
req.newfunc = sysctl_new_kernel;
req.lock = REQ_LOCKED;
- SYSCTL_SLOCK();
+ SYSCTL_XLOCK();
error = sysctl_root(0, name, namelen, &req);
- SYSCTL_SUNLOCK();
+ SYSCTL_XUNLOCK();
if (req.lock == REQ_WIRED && req.validlen > 0)
vsunlock(req.oldptr, req.validlen);
@@ -1241,8 +1249,8 @@ kernel_sysctlbyname(struct thread *td, char *name, void *old, size_t *oldlenp,
static int
sysctl_old_user(struct sysctl_req *req, const void *p, size_t l)
{
- int error = 0;
size_t i, len, origidx;
+ int error;
origidx = req->oldidx;
req->oldidx += l;
@@ -1263,10 +1271,14 @@ sysctl_old_user(struct sysctl_req *req, const void *p, size_t l)
else {
if (i > len - origidx)
i = len - origidx;
- error = copyout(p, (char *)req->oldptr + origidx, i);
+ if (req->lock == REQ_WIRED) {
+ error = copyout_nofault(p, (char *)req->oldptr +
+ origidx, i);
+ } else
+ error = copyout(p, (char *)req->oldptr + origidx, i);
+ if (error != 0)
+ return (error);
}
- if (error)
- return (error);
if (i < l)
return (ENOMEM);
return (0);
@@ -1322,37 +1334,43 @@ int
sysctl_find_oid(int *name, u_int namelen, struct sysctl_oid **noid,
int *nindx, struct sysctl_req *req)
{
+ struct sysctl_oid_list *lsp;
struct sysctl_oid *oid;
int indx;
- SYSCTL_ASSERT_LOCKED();
- oid = SLIST_FIRST(&sysctl__children);
+ SYSCTL_ASSERT_XLOCKED();
+ lsp = &sysctl__children;
indx = 0;
- while (oid && indx < CTL_MAXNAME) {
- if (oid->oid_number == name[indx]) {
- indx++;
- if (oid->oid_kind & CTLFLAG_NOLOCK)
- req->lock = REQ_UNLOCKED;
- if ((oid->oid_kind & CTLTYPE) == CTLTYPE_NODE) {
- if (oid->oid_handler != NULL ||
- indx == namelen) {
- *noid = oid;
- if (nindx != NULL)
- *nindx = indx;
- return (0);
- }
- oid = SLIST_FIRST(
- (struct sysctl_oid_list *)oid->oid_arg1);
- } else if (indx == namelen) {
+ while (indx < CTL_MAXNAME) {
+ SLIST_FOREACH(oid, lsp, oid_link) {
+ if (oid->oid_number == name[indx])
+ break;
+ }
+ if (oid == NULL)
+ return (ENOENT);
+
+ indx++;
+ if (oid->oid_kind & CTLFLAG_NOLOCK)
+ req->lock = REQ_UNLOCKED;
+ if ((oid->oid_kind & CTLTYPE) == CTLTYPE_NODE) {
+ if (oid->oid_handler != NULL || indx == namelen) {
*noid = oid;
if (nindx != NULL)
*nindx = indx;
+ KASSERT((oid->oid_kind & CTLFLAG_DYING) == 0,
+ ("%s found DYING node %p", __func__, oid));
return (0);
- } else {
- return (ENOTDIR);
}
+ lsp = SYSCTL_CHILDREN(oid);
+ } else if (indx == namelen) {
+ *noid = oid;
+ if (nindx != NULL)
+ *nindx = indx;
+ KASSERT((oid->oid_kind & CTLFLAG_DYING) == 0,
+ ("%s found DYING node %p", __func__, oid));
+ return (0);
} else {
- oid = SLIST_NEXT(oid, oid_link);
+ return (ENOTDIR);
}
}
return (ENOENT);
@@ -1369,7 +1387,7 @@ sysctl_root(SYSCTL_HANDLER_ARGS)
struct sysctl_oid *oid;
int error, indx, lvl;
- SYSCTL_ASSERT_LOCKED();
+ SYSCTL_ASSERT_XLOCKED();
error = sysctl_find_oid(arg1, arg2, &oid, &indx, req);
if (error)
@@ -1437,12 +1455,23 @@ sysctl_root(SYSCTL_HANDLER_ARGS)
if (error != 0)
return (error);
#endif
+ oid->oid_running++;
+ SYSCTL_XUNLOCK();
+
if (!(oid->oid_kind & CTLFLAG_MPSAFE))
mtx_lock(&Giant);
error = oid->oid_handler(oid, arg1, arg2, req);
if (!(oid->oid_kind & CTLFLAG_MPSAFE))
mtx_unlock(&Giant);
+#ifndef __rtems__
+ KFAIL_POINT_ERROR(_debug_fail_point, sysctl_running, error);
+#endif /* __rtems__ */
+
+ SYSCTL_XLOCK();
+ oid->oid_running--;
+ if (oid->oid_running == 0 && (oid->oid_kind & CTLFLAG_DYING) != 0)
+ wakeup(&oid->oid_running);
return (error);
}
@@ -1543,9 +1572,9 @@ userland_sysctl(struct thread *td, int *name, u_int namelen, void *old,
for (;;) {
req.oldidx = 0;
req.newidx = 0;
- SYSCTL_SLOCK();
+ SYSCTL_XLOCK();
error = sysctl_root(0, name, namelen, &req);
- SYSCTL_SUNLOCK();
+ SYSCTL_XUNLOCK();
if (error != EAGAIN)
break;
uio_yield();
@@ -1569,4 +1598,29 @@ userland_sysctl(struct thread *td, int *name, u_int namelen, void *old,
}
return (error);
}
+
+/*
+ * Drain into a sysctl struct. The user buffer should be wired if a page
+ * fault would cause issue.
+ */
+static int
+sbuf_sysctl_drain(void *arg, const char *data, int len)
+{
+ struct sysctl_req *req = arg;
+ int error;
+
+ error = SYSCTL_OUT(req, data, len);
+ KASSERT(error >= 0, ("Got unexpected negative value %d", error));
+ return (error == 0 ? len : -error);
+}
+
+struct sbuf *
+sbuf_new_for_sysctl(struct sbuf *s, char *buf, int length,
+ struct sysctl_req *req)
+{
+
+ s = sbuf_new(s, buf, length, SBUF_FIXEDLEN);
+ sbuf_set_drain(s, sbuf_sysctl_drain, req);
+ return (s);
+}
#endif /* __rtems__ */
diff --git a/freebsd/sys/kern/kern_timeout.c b/freebsd/sys/kern/kern_timeout.c
index a7fe5d59..73cbd571 100644
--- a/freebsd/sys/kern/kern_timeout.c
+++ b/freebsd/sys/kern/kern_timeout.c
@@ -58,14 +58,18 @@ __FBSDID("$FreeBSD$");
#include <sys/sysctl.h>
#include <sys/smp.h>
+#ifdef SMP
+#include <machine/cpu.h>
+#endif
+
#ifdef __rtems__
int ncallout = 16;
#endif /* __rtems__ */
SDT_PROVIDER_DEFINE(callout_execute);
-SDT_PROBE_DEFINE(callout_execute, kernel, , callout_start);
+SDT_PROBE_DEFINE(callout_execute, kernel, , callout_start, callout-start);
SDT_PROBE_ARGTYPE(callout_execute, kernel, , callout_start, 0,
"struct callout *");
-SDT_PROBE_DEFINE(callout_execute, kernel, , callout_end);
+SDT_PROBE_DEFINE(callout_execute, kernel, , callout_end, callout-end);
SDT_PROBE_ARGTYPE(callout_execute, kernel, , callout_end, 0,
"struct callout *");
@@ -88,6 +92,21 @@ SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls, CTLFLAG_RD, &avg_mpcalls, 0,
int callwheelsize, callwheelbits, callwheelmask;
/*
+ * The callout cpu migration entity represents informations necessary for
+ * describing the migrating callout to the new callout cpu.
+ * The cached informations are very important for deferring migration when
+ * the migrating callout is already running.
+ */
+struct cc_mig_ent {
+#ifdef SMP
+ void (*ce_migration_func)(void *);
+ void *ce_migration_arg;
+ int ce_migration_cpu;
+ int ce_migration_ticks;
+#endif
+};
+
+/*
* There is one struct callout_cpu per cpu, holding all relevant
* state for the callout processing thread on the individual CPU.
* In particular:
@@ -105,6 +124,7 @@ int callwheelsize, callwheelbits, callwheelmask;
* when the callout should be served.
*/
struct callout_cpu {
+ struct cc_mig_ent cc_migrating_entity;
struct mtx cc_lock;
struct callout *cc_callout;
struct callout_tailq *cc_callwheel;
@@ -119,7 +139,13 @@ struct callout_cpu {
};
#ifdef SMP
+#define cc_migration_func cc_migrating_entity.ce_migration_func
+#define cc_migration_arg cc_migrating_entity.ce_migration_arg
+#define cc_migration_cpu cc_migrating_entity.ce_migration_cpu
+#define cc_migration_ticks cc_migrating_entity.ce_migration_ticks
+
struct callout_cpu cc_cpu[MAXCPU];
+#define CPUBLOCK MAXCPU
#define CC_CPU(cpu) (&cc_cpu[(cpu)])
#define CC_SELF() CC_CPU(PCPU_GET(cpuid))
#else
@@ -129,6 +155,7 @@ struct callout_cpu cc_cpu;
#endif
#define CC_LOCK(cc) mtx_lock_spin(&(cc)->cc_lock)
#define CC_UNLOCK(cc) mtx_unlock_spin(&(cc)->cc_lock)
+#define CC_LOCK_ASSERT(cc) mtx_assert(&(cc)->cc_lock, MA_OWNED)
static int timeout_cpu;
@@ -152,6 +179,35 @@ MALLOC_DEFINE(M_CALLOUT, "callout", "Callout datastructures");
*/
/*
+ * Resets the migration entity tied to a specific callout cpu.
+ */
+static void
+cc_cme_cleanup(struct callout_cpu *cc)
+{
+
+#ifdef SMP
+ cc->cc_migration_cpu = CPUBLOCK;
+ cc->cc_migration_ticks = 0;
+ cc->cc_migration_func = NULL;
+ cc->cc_migration_arg = NULL;
+#endif
+}
+
+/*
+ * Checks if migration is requested by a specific callout cpu.
+ */
+static int
+cc_cme_migrating(struct callout_cpu *cc)
+{
+
+#ifdef SMP
+ return (cc->cc_migration_cpu != CPUBLOCK);
+#else
+ return (0);
+#endif
+}
+
+/*
* kern_timeout_callwheel_alloc() - kernel low level callwheel initialization
*
* This code is called very early in the kernel initialization sequence,
@@ -237,6 +293,7 @@ callout_cpu_init(struct callout_cpu *cc)
for (i = 0; i < callwheelsize; i++) {
TAILQ_INIT(&cc->cc_callwheel[i]);
}
+ cc_cme_cleanup(cc);
if (cc->cc_callout == NULL)
return;
for (i = 0; i < ncallout; i++) {
@@ -247,6 +304,36 @@ callout_cpu_init(struct callout_cpu *cc)
}
}
+#ifdef SMP
+/*
+ * Switches the cpu tied to a specific callout.
+ * The function expects a locked incoming callout cpu and returns with
+ * locked outcoming callout cpu.
+ */
+static struct callout_cpu *
+callout_cpu_switch(struct callout *c, struct callout_cpu *cc, int new_cpu)
+{
+ struct callout_cpu *new_cc;
+
+ MPASS(c != NULL && cc != NULL);
+ CC_LOCK_ASSERT(cc);
+
+ /*
+ * Avoid interrupts and preemption firing after the callout cpu
+ * is blocked in order to avoid deadlocks as the new thread
+ * may be willing to acquire the callout cpu lock.
+ */
+ c->c_cpu = CPUBLOCK;
+ spinlock_enter();
+ CC_UNLOCK(cc);
+ new_cc = CC_CPU(new_cpu);
+ CC_LOCK(new_cc);
+ spinlock_exit();
+ c->c_cpu = new_cpu;
+ return (new_cc);
+}
+#endif
+
#ifndef __rtems__
/*
* kern_timeout_callwheel_init() - initialize previously reserved callwheel
@@ -281,11 +368,9 @@ start_softclock(void *dummy)
panic("died while creating standard software ithreads");
cc->cc_cookie = softclock_ih;
#ifdef SMP
- for (cpu = 0; cpu <= mp_maxid; cpu++) {
+ CPU_FOREACH(cpu) {
if (cpu == timeout_cpu)
continue;
- if (CPU_ABSENT(cpu))
- continue;
cc = CC_CPU(cpu);
if (swi_add(NULL, "clock", softclock, cc, SWI_CLOCK,
INTR_MPSAFE, &cc->cc_cookie))
@@ -340,6 +425,13 @@ callout_lock(struct callout *c)
for (;;) {
cpu = c->c_cpu;
+#ifdef SMP
+ if (cpu == CPUBLOCK) {
+ while (c->c_cpu == CPUBLOCK)
+ cpu_spinwait();
+ continue;
+ }
+#endif
cc = CC_CPU(cpu);
CC_LOCK(cc);
if (cpu == c->c_cpu)
@@ -349,6 +441,202 @@ callout_lock(struct callout *c)
return (cc);
}
+static void
+callout_cc_add(struct callout *c, struct callout_cpu *cc, int to_ticks,
+ void (*func)(void *), void *arg, int cpu)
+{
+
+ CC_LOCK_ASSERT(cc);
+
+ if (to_ticks <= 0)
+ to_ticks = 1;
+ c->c_arg = arg;
+ c->c_flags |= (CALLOUT_ACTIVE | CALLOUT_PENDING);
+ c->c_func = func;
+ c->c_time = cc->cc_ticks + to_ticks;
+ TAILQ_INSERT_TAIL(&cc->cc_callwheel[c->c_time & callwheelmask],
+ c, c_links.tqe);
+}
+
+static void
+callout_cc_del(struct callout *c, struct callout_cpu *cc)
+{
+
+ if (cc->cc_next == c)
+ cc->cc_next = TAILQ_NEXT(c, c_links.tqe);
+ if (c->c_flags & CALLOUT_LOCAL_ALLOC) {
+ c->c_func = NULL;
+ SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle);
+ }
+}
+
+static struct callout *
+softclock_call_cc(struct callout *c, struct callout_cpu *cc, int *mpcalls,
+ int *lockcalls, int *gcalls)
+{
+ void (*c_func)(void *);
+ void *c_arg;
+ struct lock_class *class;
+ struct lock_object *c_lock;
+ int c_flags, sharedlock;
+#ifdef SMP
+ struct callout_cpu *new_cc;
+ void (*new_func)(void *);
+ void *new_arg;
+ int new_cpu, new_ticks;
+#endif
+#ifdef DIAGNOSTIC
+ struct bintime bt1, bt2;
+ struct timespec ts2;
+ static uint64_t maxdt = 36893488147419102LL; /* 2 msec */
+ static timeout_t *lastfunc;
+#endif
+
+ cc->cc_next = TAILQ_NEXT(c, c_links.tqe);
+ class = (c->c_lock != NULL) ? LOCK_CLASS(c->c_lock) : NULL;
+ sharedlock = (c->c_flags & CALLOUT_SHAREDLOCK) ? 0 : 1;
+ c_lock = c->c_lock;
+ c_func = c->c_func;
+ c_arg = c->c_arg;
+ c_flags = c->c_flags;
+ if (c->c_flags & CALLOUT_LOCAL_ALLOC)
+ c->c_flags = CALLOUT_LOCAL_ALLOC;
+ else
+ c->c_flags &= ~CALLOUT_PENDING;
+ cc->cc_curr = c;
+ cc->cc_cancel = 0;
+ CC_UNLOCK(cc);
+ if (c_lock != NULL) {
+ class->lc_lock(c_lock, sharedlock);
+ /*
+ * The callout may have been cancelled
+ * while we switched locks.
+ */
+ if (cc->cc_cancel) {
+ class->lc_unlock(c_lock);
+ goto skip;
+ }
+ /* The callout cannot be stopped now. */
+ cc->cc_cancel = 1;
+
+ if (c_lock == &Giant.lock_object) {
+ (*gcalls)++;
+ CTR3(KTR_CALLOUT, "callout %p func %p arg %p",
+ c, c_func, c_arg);
+ } else {
+ (*lockcalls)++;
+ CTR3(KTR_CALLOUT, "callout lock %p func %p arg %p",
+ c, c_func, c_arg);
+ }
+ } else {
+ (*mpcalls)++;
+ CTR3(KTR_CALLOUT, "callout mpsafe %p func %p arg %p",
+ c, c_func, c_arg);
+ }
+#ifdef DIAGNOSTIC
+ binuptime(&bt1);
+#endif
+#ifndef __rtems__
+ THREAD_NO_SLEEPING();
+ SDT_PROBE(callout_execute, kernel, , callout_start, c, 0, 0, 0, 0);
+#endif /* __rtems__ */
+ c_func(c_arg);
+#ifndef __rtems__
+ SDT_PROBE(callout_execute, kernel, , callout_end, c, 0, 0, 0, 0);
+ THREAD_SLEEPING_OK();
+#endif /* __rtems__ */
+#ifdef DIAGNOSTIC
+ binuptime(&bt2);
+ bintime_sub(&bt2, &bt1);
+ if (bt2.frac > maxdt) {
+ if (lastfunc != c_func || bt2.frac > maxdt * 2) {
+ bintime2timespec(&bt2, &ts2);
+ printf(
+ "Expensive timeout(9) function: %p(%p) %jd.%09ld s\n",
+ c_func, c_arg, (intmax_t)ts2.tv_sec, ts2.tv_nsec);
+ }
+ maxdt = bt2.frac;
+ lastfunc = c_func;
+ }
+#endif
+ CTR1(KTR_CALLOUT, "callout %p finished", c);
+ if ((c_flags & CALLOUT_RETURNUNLOCKED) == 0)
+ class->lc_unlock(c_lock);
+skip:
+ CC_LOCK(cc);
+ /*
+ * If the current callout is locally allocated (from
+ * timeout(9)) then put it on the freelist.
+ *
+ * Note: we need to check the cached copy of c_flags because
+ * if it was not local, then it's not safe to deref the
+ * callout pointer.
+ */
+ if (c_flags & CALLOUT_LOCAL_ALLOC) {
+ KASSERT(c->c_flags == CALLOUT_LOCAL_ALLOC,
+ ("corrupted callout"));
+ c->c_func = NULL;
+ SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle);
+ }
+ cc->cc_curr = NULL;
+ if (cc->cc_waiting) {
+ /*
+ * There is someone waiting for the
+ * callout to complete.
+ * If the callout was scheduled for
+ * migration just cancel it.
+ */
+ if (cc_cme_migrating(cc))
+ cc_cme_cleanup(cc);
+ cc->cc_waiting = 0;
+ CC_UNLOCK(cc);
+ wakeup(&cc->cc_waiting);
+ CC_LOCK(cc);
+ } else if (cc_cme_migrating(cc)) {
+#ifdef SMP
+ /*
+ * If the callout was scheduled for
+ * migration just perform it now.
+ */
+ new_cpu = cc->cc_migration_cpu;
+ new_ticks = cc->cc_migration_ticks;
+ new_func = cc->cc_migration_func;
+ new_arg = cc->cc_migration_arg;
+ cc_cme_cleanup(cc);
+
+ /*
+ * Handle deferred callout stops
+ */
+ if ((c->c_flags & CALLOUT_DFRMIGRATION) == 0) {
+ CTR3(KTR_CALLOUT,
+ "deferred cancelled %p func %p arg %p",
+ c, new_func, new_arg);
+ callout_cc_del(c, cc);
+ goto nextc;
+ }
+
+ c->c_flags &= ~CALLOUT_DFRMIGRATION;
+
+ /*
+ * It should be assert here that the
+ * callout is not destroyed but that
+ * is not easy.
+ */
+ new_cc = callout_cpu_switch(c, cc, new_cpu);
+ callout_cc_add(c, new_cc, new_ticks, new_func, new_arg,
+ new_cpu);
+ CC_UNLOCK(new_cc);
+ CC_LOCK(cc);
+#else
+ panic("migration should not happen");
+#endif
+ }
+#ifdef SMP
+nextc:
+#endif
+ return (cc->cc_next);
+}
+
/*
* The callout mechanism is based on the work of Adam M. Costello and
* George Varghese, published in a technical report entitled "Redesigning
@@ -377,12 +665,6 @@ softclock(void *arg)
int mpcalls;
int lockcalls;
int gcalls;
-#ifdef DIAGNOSTIC
- struct bintime bt1, bt2;
- struct timespec ts2;
- static uint64_t maxdt = 36893488147419102LL; /* 2 msec */
- static timeout_t *lastfunc;
-#endif
#ifndef MAX_SOFTCLOCK_STEPS
#define MAX_SOFTCLOCK_STEPS 100 /* Maximum allowed value of steps. */
@@ -404,7 +686,7 @@ softclock(void *arg)
cc->cc_softticks++;
bucket = &cc->cc_callwheel[curticks & callwheelmask];
c = TAILQ_FIRST(bucket);
- while (c) {
+ while (c != NULL) {
depth++;
if (c->c_time != curticks) {
c = TAILQ_NEXT(c, c_links.tqe);
@@ -419,128 +701,10 @@ softclock(void *arg)
steps = 0;
}
} else {
- void (*c_func)(void *);
- void *c_arg;
- struct lock_class *class;
- struct lock_object *c_lock;
- int c_flags, sharedlock;
-
- cc->cc_next = TAILQ_NEXT(c, c_links.tqe);
TAILQ_REMOVE(bucket, c, c_links.tqe);
- class = (c->c_lock != NULL) ?
- LOCK_CLASS(c->c_lock) : NULL;
- sharedlock = (c->c_flags & CALLOUT_SHAREDLOCK) ?
- 0 : 1;
- c_lock = c->c_lock;
- c_func = c->c_func;
- c_arg = c->c_arg;
- c_flags = c->c_flags;
- if (c->c_flags & CALLOUT_LOCAL_ALLOC) {
- c->c_flags = CALLOUT_LOCAL_ALLOC;
- } else {
- c->c_flags =
- (c->c_flags & ~CALLOUT_PENDING);
- }
- cc->cc_curr = c;
- cc->cc_cancel = 0;
- CC_UNLOCK(cc);
- if (c_lock != NULL) {
- class->lc_lock(c_lock, sharedlock);
- /*
- * The callout may have been cancelled
- * while we switched locks.
- */
- if (cc->cc_cancel) {
- class->lc_unlock(c_lock);
- goto skip;
- }
- /* The callout cannot be stopped now. */
- cc->cc_cancel = 1;
-
- if (c_lock == &Giant.lock_object) {
- gcalls++;
- CTR3(KTR_CALLOUT,
- "callout %p func %p arg %p",
- c, c_func, c_arg);
- } else {
- lockcalls++;
- CTR3(KTR_CALLOUT, "callout lock"
- " %p func %p arg %p",
- c, c_func, c_arg);
- }
- } else {
- mpcalls++;
- CTR3(KTR_CALLOUT,
- "callout mpsafe %p func %p arg %p",
- c, c_func, c_arg);
- }
-#ifdef DIAGNOSTIC
- binuptime(&bt1);
-#endif
-#ifndef __rtems__
- THREAD_NO_SLEEPING();
- SDT_PROBE(callout_execute, kernel, ,
- callout_start, c, 0, 0, 0, 0);
-#endif /* __rtems__ */
- c_func(c_arg);
-#ifndef __rtems__
- SDT_PROBE(callout_execute, kernel, ,
- callout_end, c, 0, 0, 0, 0);
- THREAD_SLEEPING_OK();
-#endif /* __rtems__ */
-#ifdef DIAGNOSTIC
- binuptime(&bt2);
- bintime_sub(&bt2, &bt1);
- if (bt2.frac > maxdt) {
- if (lastfunc != c_func ||
- bt2.frac > maxdt * 2) {
- bintime2timespec(&bt2, &ts2);
- printf(
- "Expensive timeout(9) function: %p(%p) %jd.%09ld s\n",
- c_func, c_arg,
- (intmax_t)ts2.tv_sec,
- ts2.tv_nsec);
- }
- maxdt = bt2.frac;
- lastfunc = c_func;
- }
-#endif
- CTR1(KTR_CALLOUT, "callout %p finished", c);
- if ((c_flags & CALLOUT_RETURNUNLOCKED) == 0)
- class->lc_unlock(c_lock);
- skip:
- CC_LOCK(cc);
- /*
- * If the current callout is locally
- * allocated (from timeout(9))
- * then put it on the freelist.
- *
- * Note: we need to check the cached
- * copy of c_flags because if it was not
- * local, then it's not safe to deref the
- * callout pointer.
- */
- if (c_flags & CALLOUT_LOCAL_ALLOC) {
- KASSERT(c->c_flags ==
- CALLOUT_LOCAL_ALLOC,
- ("corrupted callout"));
- c->c_func = NULL;
- SLIST_INSERT_HEAD(&cc->cc_callfree, c,
- c_links.sle);
- }
- cc->cc_curr = NULL;
- if (cc->cc_waiting) {
- /*
- * There is someone waiting
- * for the callout to complete.
- */
- cc->cc_waiting = 0;
- CC_UNLOCK(cc);
- wakeup(&cc->cc_waiting);
- CC_LOCK(cc);
- }
+ c = softclock_call_cc(c, cc, &mpcalls,
+ &lockcalls, &gcalls);
steps = 0;
- c = cc->cc_next;
}
}
}
@@ -650,7 +814,6 @@ callout_reset_on(struct callout *c, int to_ticks, void (*ftn)(void *),
*/
if (c->c_flags & CALLOUT_LOCAL_ALLOC)
cpu = c->c_cpu;
-retry:
cc = callout_lock(c);
if (cc->cc_curr == c) {
/*
@@ -682,25 +845,31 @@ retry:
cancelled = 1;
c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING);
}
+
+#ifdef SMP
/*
- * If the lock must migrate we have to check the state again as
- * we can't hold both the new and old locks simultaneously.
+ * If the callout must migrate try to perform it immediately.
+ * If the callout is currently running, just defer the migration
+ * to a more appropriate moment.
*/
if (c->c_cpu != cpu) {
- c->c_cpu = cpu;
- CC_UNLOCK(cc);
- goto retry;
+ if (cc->cc_curr == c) {
+ cc->cc_migration_cpu = cpu;
+ cc->cc_migration_ticks = to_ticks;
+ cc->cc_migration_func = ftn;
+ cc->cc_migration_arg = arg;
+ c->c_flags |= CALLOUT_DFRMIGRATION;
+ CTR5(KTR_CALLOUT,
+ "migration of %p func %p arg %p in %d to %u deferred",
+ c, c->c_func, c->c_arg, to_ticks, cpu);
+ CC_UNLOCK(cc);
+ return (cancelled);
+ }
+ cc = callout_cpu_switch(c, cc, cpu);
}
+#endif
- if (to_ticks <= 0)
- to_ticks = 1;
-
- c->c_arg = arg;
- c->c_flags |= (CALLOUT_ACTIVE | CALLOUT_PENDING);
- c->c_func = ftn;
- c->c_time = cc->cc_ticks + to_ticks;
- TAILQ_INSERT_TAIL(&cc->cc_callwheel[c->c_time & callwheelmask],
- c, c_links.tqe);
+ callout_cc_add(c, cc, to_ticks, ftn, arg, cpu);
CTR5(KTR_CALLOUT, "%sscheduled %p func %p arg %p in %d",
cancelled ? "re" : "", c, c->c_func, c->c_arg, to_ticks);
CC_UNLOCK(cc);
@@ -728,13 +897,13 @@ _callout_stop_safe(c, safe)
struct callout *c;
int safe;
{
- struct callout_cpu *cc;
+ struct callout_cpu *cc, *old_cc;
struct lock_class *class;
#ifndef __rtems__
int use_lock, sq_locked;
#else /* __rtems__ */
int use_lock;
-#endif /* __rtems__ */
+#endif /* __rtems__ */
/*
* Some old subsystems don't hold Giant while running a callout_stop(),
@@ -753,9 +922,30 @@ _callout_stop_safe(c, safe)
#ifndef __rtems__
sq_locked = 0;
+ old_cc = NULL;
again:
-#endif /* __rtems__ */
+#endif /* __rtems__ */
cc = callout_lock(c);
+
+#ifndef __rtems__
+ /*
+ * If the callout was migrating while the callout cpu lock was
+ * dropped, just drop the sleepqueue lock and check the states
+ * again.
+ */
+ if (sq_locked != 0 && cc != old_cc) {
+#ifdef SMP
+ CC_UNLOCK(cc);
+ sleepq_release(&old_cc->cc_waiting);
+ sq_locked = 0;
+ old_cc = NULL;
+ goto again;
+#else
+ panic("migration should not happen");
+#endif
+ }
+#endif /* __rtems__ */
+
/*
* If the callout isn't pending, it's not on the queue, so
* don't attempt to remove it from the queue. We can try to
@@ -775,7 +965,7 @@ again:
#ifndef __rtems__
if (sq_locked)
sleepq_release(&cc->cc_waiting);
-#endif /* __rtems__ */
+#endif /* __rtems__ */
return (0);
}
@@ -810,8 +1000,16 @@ again:
CC_UNLOCK(cc);
sleepq_lock(&cc->cc_waiting);
sq_locked = 1;
+ old_cc = cc;
goto again;
}
+
+ /*
+ * Migration could be cancelled here, but
+ * as long as it is still not sure when it
+ * will be packed up, just let softclock()
+ * take care of it.
+ */
cc->cc_waiting = 1;
DROP_GIANT();
CC_UNLOCK(cc);
@@ -820,6 +1018,7 @@ again:
SLEEPQ_SLEEP, 0);
sleepq_wait(&cc->cc_waiting, 0);
sq_locked = 0;
+ old_cc = NULL;
/* Reacquire locks previously released. */
PICKUP_GIANT();
@@ -847,9 +1046,17 @@ again:
cc->cc_cancel = 1;
CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p",
c, c->c_func, c->c_arg);
+ KASSERT(!cc_cme_migrating(cc),
+ ("callout wrongly scheduled for migration"));
CC_UNLOCK(cc);
KASSERT(!sq_locked, ("sleepqueue chain locked"));
return (1);
+ } else if ((c->c_flags & CALLOUT_DFRMIGRATION) != 0) {
+ c->c_flags &= ~CALLOUT_DFRMIGRATION;
+ CTR3(KTR_CALLOUT, "postponing stop %p func %p arg %p",
+ c, c->c_func, c->c_arg);
+ CC_UNLOCK(cc);
+ return (1);
}
CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p",
c, c->c_func, c->c_arg);
@@ -860,23 +1067,16 @@ again:
#ifndef __rtems__
if (sq_locked)
sleepq_release(&cc->cc_waiting);
-#endif /* __rtems__ */
+#endif /* __rtems__ */
c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING);
- if (cc->cc_next == c) {
- cc->cc_next = TAILQ_NEXT(c, c_links.tqe);
- }
- TAILQ_REMOVE(&cc->cc_callwheel[c->c_time & callwheelmask], c,
- c_links.tqe);
-
CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p",
c, c->c_func, c->c_arg);
+ TAILQ_REMOVE(&cc->cc_callwheel[c->c_time & callwheelmask], c,
+ c_links.tqe);
+ callout_cc_del(c, cc);
- if (c->c_flags & CALLOUT_LOCAL_ALLOC) {
- c->c_func = NULL;
- SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle);
- }
CC_UNLOCK(cc);
return (1);
}
diff --git a/freebsd/sys/kern/subr_bus.c b/freebsd/sys/kern/subr_bus.c
index 51717b49..78a803e0 100644
--- a/freebsd/sys/kern/subr_bus.c
+++ b/freebsd/sys/kern/subr_bus.c
@@ -430,8 +430,8 @@ static void
devinit(void)
{
#ifndef __rtems__
- devctl_dev = make_dev(&dev_cdevsw, 0, UID_ROOT, GID_WHEEL, 0600,
- "devctl");
+ devctl_dev = make_dev_credf(MAKEDEV_ETERNAL, &dev_cdevsw, 0, NULL,
+ UID_ROOT, GID_WHEEL, 0600, "devctl");
mtx_init(&devsoftc.mtx, "dev mtx", "devd", MTX_DEF);
cv_init(&devsoftc.cv, "dev cv");
TAILQ_INIT(&devsoftc.devq);
@@ -1058,10 +1058,12 @@ devclass_find(const char *classname)
* is called by devclass_add_driver to accomplish the recursive
* notification of all the children classes of dc, as well as dc.
* Each layer will have BUS_DRIVER_ADDED() called for all instances of
- * the devclass. We do a full search here of the devclass list at
- * each iteration level to save storing children-lists in the devclass
- * structure. If we ever move beyond a few dozen devices doing this,
- * we may need to reevaluate...
+ * the devclass.
+ *
+ * We do a full search here of the devclass list at each iteration
+ * level to save storing children-lists in the devclass structure. If
+ * we ever move beyond a few dozen devices doing this, we may need to
+ * reevaluate...
*
* @param dc the devclass to edit
* @param driver the driver that was just added
@@ -1156,6 +1158,77 @@ devclass_add_driver(devclass_t dc, driver_t *driver, int pass, devclass_t *dcp)
}
/**
+ * @brief Register that a device driver has been deleted from a devclass
+ *
+ * Register that a device driver has been removed from a devclass.
+ * This is called by devclass_delete_driver to accomplish the
+ * recursive notification of all the children classes of busclass, as
+ * well as busclass. Each layer will attempt to detach the driver
+ * from any devices that are children of the bus's devclass. The function
+ * will return an error if a device fails to detach.
+ *
+ * We do a full search here of the devclass list at each iteration
+ * level to save storing children-lists in the devclass structure. If
+ * we ever move beyond a few dozen devices doing this, we may need to
+ * reevaluate...
+ *
+ * @param busclass the devclass of the parent bus
+ * @param dc the devclass of the driver being deleted
+ * @param driver the driver being deleted
+ */
+static int
+devclass_driver_deleted(devclass_t busclass, devclass_t dc, driver_t *driver)
+{
+ devclass_t parent;
+ device_t dev;
+ int error, i;
+
+ /*
+ * Disassociate from any devices. We iterate through all the
+ * devices in the devclass of the driver and detach any which are
+ * using the driver and which have a parent in the devclass which
+ * we are deleting from.
+ *
+ * Note that since a driver can be in multiple devclasses, we
+ * should not detach devices which are not children of devices in
+ * the affected devclass.
+ */
+ for (i = 0; i < dc->maxunit; i++) {
+ if (dc->devices[i]) {
+ dev = dc->devices[i];
+ if (dev->driver == driver && dev->parent &&
+ dev->parent->devclass == busclass) {
+ if ((error = device_detach(dev)) != 0)
+ return (error);
+ BUS_PROBE_NOMATCH(dev->parent, dev);
+ devnomatch(dev);
+ dev->flags |= DF_DONENOMATCH;
+ }
+ }
+ }
+
+ /*
+ * Walk through the children classes. Since we only keep a
+ * single parent pointer around, we walk the entire list of
+ * devclasses looking for children. We set the
+ * DC_HAS_CHILDREN flag when a child devclass is created on
+ * the parent, so we only walk the list for those devclasses
+ * that have children.
+ */
+ if (!(busclass->flags & DC_HAS_CHILDREN))
+ return (0);
+ parent = busclass;
+ TAILQ_FOREACH(busclass, &devclasses, link) {
+ if (busclass->parent == parent) {
+ error = devclass_driver_deleted(busclass, dc, driver);
+ if (error)
+ return (error);
+ }
+ }
+ return (0);
+}
+
+/**
* @brief Delete a device driver from a device class
*
* Delete a device driver from a devclass. This is normally called
@@ -1174,8 +1247,6 @@ devclass_delete_driver(devclass_t busclass, driver_t *driver)
{
devclass_t dc = devclass_find(driver->name);
driverlink_t dl;
- device_t dev;
- int i;
int error;
PDEBUG(("%s from devclass %s", driver->name, DEVCLANAME(busclass)));
@@ -1197,27 +1268,9 @@ devclass_delete_driver(devclass_t busclass, driver_t *driver)
return (ENOENT);
}
- /*
- * Disassociate from any devices. We iterate through all the
- * devices in the devclass of the driver and detach any which are
- * using the driver and which have a parent in the devclass which
- * we are deleting from.
- *
- * Note that since a driver can be in multiple devclasses, we
- * should not detach devices which are not children of devices in
- * the affected devclass.
- */
- for (i = 0; i < dc->maxunit; i++) {
- if (dc->devices[i]) {
- dev = dc->devices[i];
- if (dev->driver == driver && dev->parent &&
- dev->parent->devclass == busclass) {
- if ((error = device_detach(dev)) != 0)
- return (error);
- device_set_driver(dev, NULL);
- }
- }
- }
+ error = devclass_driver_deleted(busclass, dc, driver);
+ if (error != 0)
+ return (error);
TAILQ_REMOVE(&busclass->drivers, dl, link);
free(dl, M_BUS);
@@ -1889,7 +1942,7 @@ device_delete_child(device_t dev, device_t child)
PDEBUG(("%s from %s", DEVICENAME(child), DEVICENAME(dev)));
/* remove children first */
- while ( (grandchild = TAILQ_FIRST(&child->children)) ) {
+ while ((grandchild = TAILQ_FIRST(&child->children)) != NULL) {
error = device_delete_child(child, grandchild);
if (error)
return (error);
@@ -1908,6 +1961,39 @@ device_delete_child(device_t dev, device_t child)
}
/**
+ * @brief Delete all children devices of the given device, if any.
+ *
+ * This function deletes all children devices of the given device, if
+ * any, using the device_delete_child() function for each device it
+ * finds. If a child device cannot be deleted, this function will
+ * return an error code.
+ *
+ * @param dev the parent device
+ *
+ * @retval 0 success
+ * @retval non-zero a device would not detach
+ */
+int
+device_delete_children(device_t dev)
+{
+ device_t child;
+ int error;
+
+ PDEBUG(("Deleting all children of %s", DEVICENAME(dev)));
+
+ error = 0;
+
+ while ((child = TAILQ_FIRST(&dev->children)) != NULL) {
+ error = device_delete_child(dev, child);
+ if (error) {
+ PDEBUG(("Failed deleting %s", DEVICENAME(child)));
+ break;
+ }
+ }
+ return (error);
+}
+
+/**
* @brief Find a device given a unit number
*
* This is similar to devclass_get_devices() but only searches for
@@ -2001,19 +2087,23 @@ device_probe_child(device_t dev, device_t child)
for (dl = first_matching_driver(dc, child);
dl;
dl = next_matching_driver(dc, child, dl)) {
-
/* If this driver's pass is too high, then ignore it. */
if (dl->pass > bus_current_pass)
continue;
PDEBUG(("Trying %s", DRIVERNAME(dl->driver)));
- device_set_driver(child, dl->driver);
+ result = device_set_driver(child, dl->driver);
+ if (result == ENOMEM)
+ return (result);
+ else if (result != 0)
+ continue;
if (!hasclass) {
- if (device_set_devclass(child, dl->driver->name)) {
- printf("driver bug: Unable to set devclass (devname: %s)\n",
- (child ? device_get_name(child) :
- "no device"));
- device_set_driver(child, NULL);
+ if (device_set_devclass(child,
+ dl->driver->name) != 0) {
+ printf("driver bug: Unable to set "
+ "devclass (devname: %s)\n",
+ device_get_name(child));
+ (void)device_set_driver(child, NULL);
continue;
}
}
@@ -2029,7 +2119,7 @@ device_probe_child(device_t dev, device_t child)
/* Reset flags and devclass before the next probe. */
child->devflags = 0;
if (!hasclass)
- device_set_devclass(child, NULL);
+ (void)device_set_devclass(child, NULL);
/*
* If the driver returns SUCCESS, there can be
@@ -2046,7 +2136,7 @@ device_probe_child(device_t dev, device_t child)
* certainly doesn't match.
*/
if (result > 0) {
- device_set_driver(child, NULL);
+ (void)device_set_driver(child, NULL);
continue;
}
@@ -2083,7 +2173,7 @@ device_probe_child(device_t dev, device_t child)
/* XXX What happens if we rebid and got no best? */
if (best) {
/*
- * If this device was atached, and we were asked to
+ * If this device was attached, and we were asked to
* rescan, and it is a different driver, then we have
* to detach the old driver and reattach this new one.
* Note, we don't have to check for DF_REBID here
@@ -2109,7 +2199,9 @@ device_probe_child(device_t dev, device_t child)
if (result != 0)
return (result);
}
- device_set_driver(child, best->driver);
+ result = device_set_driver(child, best->driver);
+ if (result != 0)
+ return (result);
#ifndef __rtems__
resource_int_value(best->driver->name, child->unit,
"flags", &child->devflags);
@@ -2171,6 +2263,11 @@ device_get_children(device_t dev, device_t **devlistp, int *devcountp)
TAILQ_FOREACH(child, &dev->children, link) {
count++;
}
+ if (count == 0) {
+ *devlistp = NULL;
+ *devcountp = 0;
+ return (0);
+ }
#ifdef __rtems__
/* malloc(0) may return NULL */
@@ -2471,12 +2568,13 @@ device_disable(device_t dev)
void
device_busy(device_t dev)
{
- if (dev->state < DS_ATTACHED)
+ if (dev->state < DS_ATTACHING)
panic("device_busy: called for unattached device");
if (dev->busy == 0 && dev->parent)
device_busy(dev->parent);
dev->busy++;
- dev->state = DS_BUSY;
+ if (dev->state == DS_ATTACHED)
+ dev->state = DS_BUSY;
}
/**
@@ -2485,14 +2583,16 @@ device_busy(device_t dev)
void
device_unbusy(device_t dev)
{
- if (dev->state != DS_BUSY)
+ if (dev->busy != 0 && dev->state != DS_BUSY &&
+ dev->state != DS_ATTACHING)
panic("device_unbusy: called for non-busy device %s",
device_get_nameunit(dev));
dev->busy--;
if (dev->busy == 0) {
if (dev->parent)
device_unbusy(dev->parent);
- dev->state = DS_ATTACHED;
+ if (dev->state == DS_BUSY)
+ dev->state = DS_ATTACHED;
}
}
@@ -2602,6 +2702,7 @@ device_set_driver(device_t dev, driver_t *driver)
free(dev->softc, M_BUS_SC);
dev->softc = NULL;
}
+ device_set_desc(dev, NULL);
kobj_delete((kobj_t) dev, NULL);
dev->driver = driver;
if (driver) {
@@ -2724,22 +2825,36 @@ device_attach(device_t dev)
{
int error;
+#ifndef __rtems__
+ if (resource_disabled(dev->driver->name, dev->unit)) {
+ device_disable(dev);
+ if (bootverbose)
+ device_printf(dev, "disabled via hints entry\n");
+ return (ENXIO);
+ }
+#endif /* __rtems__ */
+
device_sysctl_init(dev);
if (!device_is_quiet(dev))
device_print_child(dev->parent, dev);
+ dev->state = DS_ATTACHING;
if ((error = DEVICE_ATTACH(dev)) != 0) {
printf("device_attach: %s%d attach returned %d\n",
dev->driver->name, dev->unit, error);
- /* Unset the class; set in device_probe_child */
- if (dev->devclass == NULL)
- device_set_devclass(dev, NULL);
- device_set_driver(dev, NULL);
+ if (!(dev->flags & DF_FIXEDCLASS))
+ devclass_delete_device(dev->devclass, dev);
+ (void)device_set_driver(dev, NULL);
device_sysctl_fini(dev);
+ KASSERT(dev->busy == 0, ("attach failed but busy"));
dev->state = DS_NOTPRESENT;
return (error);
}
device_sysctl_update(dev);
- dev->state = DS_ATTACHED;
+ if (dev->busy)
+ dev->state = DS_BUSY;
+ else
+ dev->state = DS_ATTACHED;
+ dev->flags &= ~DF_DONENOMATCH;
devadded(dev);
return (0);
}
@@ -2785,8 +2900,7 @@ device_detach(device_t dev)
devclass_delete_device(dev->devclass, dev);
dev->state = DS_NOTPRESENT;
- device_set_driver(dev, NULL);
- device_set_desc(dev, NULL);
+ (void)device_set_driver(dev, NULL);
device_sysctl_fini(dev);
return (0);
@@ -3517,6 +3631,23 @@ bus_generic_teardown_intr(device_t dev, device_t child, struct resource *irq,
}
/**
+ * @brief Helper function for implementing BUS_ADJUST_RESOURCE().
+ *
+ * This simple implementation of BUS_ADJUST_RESOURCE() simply calls the
+ * BUS_ADJUST_RESOURCE() method of the parent of @p dev.
+ */
+int
+bus_generic_adjust_resource(device_t dev, device_t child, int type,
+ struct resource *r, u_long start, u_long end)
+{
+ /* Propagate up the bus hierarchy until someone handles it. */
+ if (dev->parent)
+ return (BUS_ADJUST_RESOURCE(dev->parent, child, type, r, start,
+ end));
+ return (EINVAL);
+}
+
+/**
* @brief Helper function for implementing BUS_ALLOC_RESOURCE().
*
* This simple implementation of BUS_ALLOC_RESOURCE() simply calls the
@@ -3839,6 +3970,21 @@ bus_alloc_resource(device_t dev, int type, int *rid, u_long start, u_long end,
}
/**
+ * @brief Wrapper function for BUS_ADJUST_RESOURCE().
+ *
+ * This function simply calls the BUS_ADJUST_RESOURCE() method of the
+ * parent of @p dev.
+ */
+int
+bus_adjust_resource(device_t dev, int type, struct resource *r, u_long start,
+ u_long end)
+{
+ if (dev->parent == NULL)
+ return (EINVAL);
+ return (BUS_ADJUST_RESOURCE(dev->parent, dev, type, r, start, end));
+}
+
+/**
* @brief Wrapper function for BUS_ACTIVATE_RESOURCE().
*
* This function simply calls the BUS_ACTIVATE_RESOURCE() method of the
@@ -4417,7 +4563,6 @@ print_driver(driver_t *driver, int indent)
print_driver_short(driver, indent);
}
-
static void
print_driver_list(driver_list_t drivers, int indent)
{
diff --git a/freebsd/sys/kern/subr_kobj.c b/freebsd/sys/kern/subr_kobj.c
index e12a1f36..8d182173 100644
--- a/freebsd/sys/kern/subr_kobj.c
+++ b/freebsd/sys/kern/subr_kobj.c
@@ -62,18 +62,9 @@ static struct mtx kobj_mtx;
static int kobj_mutex_inited;
static int kobj_next_id = 1;
-/*
- * In the event that kobj_mtx has not been initialized yet,
- * we will ignore it, and run without locks in order to support
- * use of KOBJ before mutexes are available. This early in the boot
- * process, everything is single threaded and so races should not
- * happen. This is used to provide the PMAP layer on PowerPC, as well
- * as board support.
- */
-
-#define KOBJ_LOCK() if (kobj_mutex_inited) mtx_lock(&kobj_mtx);
-#define KOBJ_UNLOCK() if (kobj_mutex_inited) mtx_unlock(&kobj_mtx);
-#define KOBJ_ASSERT(what) if (kobj_mutex_inited) mtx_assert(&kobj_mtx,what);
+#define KOBJ_LOCK() mtx_lock(&kobj_mtx)
+#define KOBJ_UNLOCK() mtx_unlock(&kobj_mtx)
+#define KOBJ_ASSERT(what) mtx_assert(&kobj_mtx, what);
SYSCTL_UINT(_kern, OID_AUTO, kobj_methodcount, CTLFLAG_RD,
&kobj_next_id, 0, "");
@@ -106,28 +97,11 @@ kobj_error_method(void)
}
static void
-kobj_register_method(struct kobjop_desc *desc)
-{
- KOBJ_ASSERT(MA_OWNED);
-
- if (desc->id == 0) {
- desc->id = kobj_next_id++;
- }
-}
-
-static void
-kobj_unregister_method(struct kobjop_desc *desc)
-{
-}
-
-static void
kobj_class_compile_common(kobj_class_t cls, kobj_ops_t ops)
{
kobj_method_t *m;
int i;
- KOBJ_ASSERT(MA_OWNED);
-
/*
* Don't do anything if we are already compiled.
*/
@@ -137,8 +111,10 @@ kobj_class_compile_common(kobj_class_t cls, kobj_ops_t ops)
/*
* First register any methods which need it.
*/
- for (i = 0, m = cls->methods; m->desc; i++, m++)
- kobj_register_method(m->desc);
+ for (i = 0, m = cls->methods; m->desc; i++, m++) {
+ if (m->desc->id == 0)
+ m->desc->id = kobj_next_id++;
+ }
/*
* Then initialise the ops table.
@@ -161,7 +137,7 @@ kobj_class_compile(kobj_class_t cls)
*/
ops = malloc(sizeof(struct kobj_ops), M_KOBJ, M_NOWAIT);
if (!ops)
- panic("kobj_compile_methods: out of memory");
+ panic("%s: out of memory", __func__);
KOBJ_LOCK();
@@ -184,17 +160,14 @@ void
kobj_class_compile_static(kobj_class_t cls, kobj_ops_t ops)
{
- KOBJ_ASSERT(MA_NOTOWNED);
+ KASSERT(kobj_mutex_inited == 0,
+ ("%s: only supported during early cycles", __func__));
/*
* Increment refs to make sure that the ops table is not freed.
*/
- KOBJ_LOCK();
-
cls->refs++;
kobj_class_compile_common(cls, ops);
-
- KOBJ_UNLOCK();
}
static kobj_method_t*
@@ -261,8 +234,6 @@ kobj_lookup_method(kobj_class_t cls,
void
kobj_class_free(kobj_class_t cls)
{
- int i;
- kobj_method_t *m;
void* ops = NULL;
KOBJ_ASSERT(MA_NOTOWNED);
@@ -274,10 +245,9 @@ kobj_class_free(kobj_class_t cls)
*/
if (cls->refs == 0) {
/*
- * Unregister any methods which are no longer used.
+ * For now we don't do anything to unregister any methods
+ * which are no longer used.
*/
- for (i = 0, m = cls->methods; m->desc; i++, m++)
- kobj_unregister_method(m->desc);
/*
* Free memory and clean up.
@@ -310,6 +280,14 @@ kobj_create(kobj_class_t cls,
return obj;
}
+static void
+kobj_init_common(kobj_t obj, kobj_class_t cls)
+{
+
+ obj->ops = cls->ops;
+ cls->refs++;
+}
+
void
kobj_init(kobj_t obj, kobj_class_t cls)
{
@@ -331,13 +309,22 @@ kobj_init(kobj_t obj, kobj_class_t cls)
goto retry;
}
- obj->ops = cls->ops;
- cls->refs++;
+ kobj_init_common(obj, cls);
KOBJ_UNLOCK();
}
void
+kobj_init_static(kobj_t obj, kobj_class_t cls)
+{
+
+ KASSERT(kobj_mutex_inited == 0,
+ ("%s: only supported during early cycles", __func__));
+
+ kobj_init_common(obj, cls);
+}
+
+void
kobj_delete(kobj_t obj, struct malloc_type *mtype)
{
kobj_class_t cls = obj->ops->cls;
diff --git a/freebsd/sys/kern/subr_rman.c b/freebsd/sys/kern/subr_rman.c
index c3c87e44..a1982daf 100644
--- a/freebsd/sys/kern/subr_rman.c
+++ b/freebsd/sys/kern/subr_rman.c
@@ -140,6 +140,8 @@ rman_init(struct rman *rm)
mtx_init(&rman_mtx, "rman head", NULL, MTX_DEF);
}
+ if (rm->rm_start == 0 && rm->rm_end == 0)
+ rm->rm_end = ~0ul;
if (rm->rm_type == RMAN_UNINIT)
panic("rman_init");
if (rm->rm_type == RMAN_GAUGE)
@@ -164,6 +166,8 @@ rman_manage_region(struct rman *rm, u_long start, u_long end)
DPRINTF(("rman_manage_region: <%s> request: start %#lx, end %#lx\n",
rm->rm_descr, start, end));
+ if (start < rm->rm_start || end > rm->rm_end)
+ return EINVAL;
r = int_alloc_resource(M_NOWAIT);
if (r == NULL)
return ENOMEM;
@@ -270,6 +274,164 @@ rman_fini(struct rman *rm)
return 0;
}
+int
+rman_first_free_region(struct rman *rm, u_long *start, u_long *end)
+{
+ struct resource_i *r;
+
+ mtx_lock(rm->rm_mtx);
+ TAILQ_FOREACH(r, &rm->rm_list, r_link) {
+ if (!(r->r_flags & RF_ALLOCATED)) {
+ *start = r->r_start;
+ *end = r->r_end;
+ mtx_unlock(rm->rm_mtx);
+ return (0);
+ }
+ }
+ mtx_unlock(rm->rm_mtx);
+ return (ENOENT);
+}
+
+int
+rman_last_free_region(struct rman *rm, u_long *start, u_long *end)
+{
+ struct resource_i *r;
+
+ mtx_lock(rm->rm_mtx);
+ TAILQ_FOREACH_REVERSE(r, &rm->rm_list, resource_head, r_link) {
+ if (!(r->r_flags & RF_ALLOCATED)) {
+ *start = r->r_start;
+ *end = r->r_end;
+ mtx_unlock(rm->rm_mtx);
+ return (0);
+ }
+ }
+ mtx_unlock(rm->rm_mtx);
+ return (ENOENT);
+}
+
+/* Shrink or extend one or both ends of an allocated resource. */
+int
+rman_adjust_resource(struct resource *rr, u_long start, u_long end)
+{
+ struct resource_i *r, *s, *t, *new;
+ struct rman *rm;
+
+ /* Not supported for shared resources. */
+ r = rr->__r_i;
+ if (r->r_flags & (RF_TIMESHARE | RF_SHAREABLE))
+ return (EINVAL);
+
+ /*
+ * This does not support wholesale moving of a resource. At
+ * least part of the desired new range must overlap with the
+ * existing resource.
+ */
+ if (end < r->r_start || r->r_end < start)
+ return (EINVAL);
+
+ /*
+ * Find the two resource regions immediately adjacent to the
+ * allocated resource.
+ */
+ rm = r->r_rm;
+ mtx_lock(rm->rm_mtx);
+#ifdef INVARIANTS
+ TAILQ_FOREACH(s, &rm->rm_list, r_link) {
+ if (s == r)
+ break;
+ }
+ if (s == NULL)
+ panic("resource not in list");
+#endif
+ s = TAILQ_PREV(r, resource_head, r_link);
+ t = TAILQ_NEXT(r, r_link);
+ KASSERT(s == NULL || s->r_end + 1 == r->r_start,
+ ("prev resource mismatch"));
+ KASSERT(t == NULL || r->r_end + 1 == t->r_start,
+ ("next resource mismatch"));
+
+ /*
+ * See if the changes are permitted. Shrinking is always allowed,
+ * but growing requires sufficient room in the adjacent region.
+ */
+ if (start < r->r_start && (s == NULL || (s->r_flags & RF_ALLOCATED) ||
+ s->r_start > start)) {
+ mtx_unlock(rm->rm_mtx);
+ return (EBUSY);
+ }
+ if (end > r->r_end && (t == NULL || (t->r_flags & RF_ALLOCATED) ||
+ t->r_end < end)) {
+ mtx_unlock(rm->rm_mtx);
+ return (EBUSY);
+ }
+
+ /*
+ * While holding the lock, grow either end of the resource as
+ * needed and shrink either end if the shrinking does not require
+ * allocating a new resource. We can safely drop the lock and then
+ * insert a new range to handle the shrinking case afterwards.
+ */
+ if (start < r->r_start ||
+ (start > r->r_start && s != NULL && !(s->r_flags & RF_ALLOCATED))) {
+ KASSERT(s->r_flags == 0, ("prev is busy"));
+ r->r_start = start;
+ if (s->r_start == start) {
+ TAILQ_REMOVE(&rm->rm_list, s, r_link);
+ free(s, M_RMAN);
+ } else
+ s->r_end = start - 1;
+ }
+ if (end > r->r_end ||
+ (end < r->r_end && t != NULL && !(t->r_flags & RF_ALLOCATED))) {
+ KASSERT(t->r_flags == 0, ("next is busy"));
+ r->r_end = end;
+ if (t->r_end == end) {
+ TAILQ_REMOVE(&rm->rm_list, t, r_link);
+ free(t, M_RMAN);
+ } else
+ t->r_start = end + 1;
+ }
+ mtx_unlock(rm->rm_mtx);
+
+ /*
+ * Handle the shrinking cases that require allocating a new
+ * resource to hold the newly-free region. We have to recheck
+ * if we still need this new region after acquiring the lock.
+ */
+ if (start > r->r_start) {
+ new = int_alloc_resource(M_WAITOK);
+ new->r_start = r->r_start;
+ new->r_end = start - 1;
+ new->r_rm = rm;
+ mtx_lock(rm->rm_mtx);
+ r->r_start = start;
+ s = TAILQ_PREV(r, resource_head, r_link);
+ if (s != NULL && !(s->r_flags & RF_ALLOCATED)) {
+ s->r_end = start - 1;
+ free(new, M_RMAN);
+ } else
+ TAILQ_INSERT_BEFORE(r, new, r_link);
+ mtx_unlock(rm->rm_mtx);
+ }
+ if (end < r->r_end) {
+ new = int_alloc_resource(M_WAITOK);
+ new->r_start = end + 1;
+ new->r_end = r->r_end;
+ new->r_rm = rm;
+ mtx_lock(rm->rm_mtx);
+ r->r_end = end;
+ t = TAILQ_NEXT(r, r_link);
+ if (t != NULL && !(t->r_flags & RF_ALLOCATED)) {
+ t->r_start = end + 1;
+ free(new, M_RMAN);
+ } else
+ TAILQ_INSERT_AFTER(&rm->rm_list, r, new, r_link);
+ mtx_unlock(rm->rm_mtx);
+ }
+ return (0);
+}
+
struct resource *
rman_reserve_resource_bound(struct rman *rm, u_long start, u_long end,
u_long count, u_long bound, u_int flags,
@@ -679,6 +841,7 @@ int_rman_release_resource(struct rman *rm, struct resource_i *r)
* without freeing anything.
*/
r->r_flags &= ~RF_ALLOCATED;
+ r->r_dev = NULL;
return 0;
}
diff --git a/freebsd/sys/kern/subr_sbuf.c b/freebsd/sys/kern/subr_sbuf.c
index 165d17a3..c57392d6 100644
--- a/freebsd/sys/kern/subr_sbuf.c
+++ b/freebsd/sys/kern/subr_sbuf.c
@@ -35,6 +35,7 @@ __FBSDID("$FreeBSD$");
#ifdef _KERNEL
#include <sys/ctype.h>
+#include <rtems/bsd/sys/errno.h>
#include <sys/kernel.h>
#include <sys/malloc.h>
#include <sys/systm.h>
@@ -42,6 +43,7 @@ __FBSDID("$FreeBSD$");
#include <machine/stdarg.h>
#else /* _KERNEL */
#include <ctype.h>
+#include <errno.h>
#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
@@ -50,6 +52,12 @@ __FBSDID("$FreeBSD$");
#include <sys/sbuf.h>
+struct sbuf_drain {
+ sbuf_drain_func *s_func; /* drain function */
+ void *s_arg; /* user-supplied drain argument */
+ int s_error; /* current error code */
+};
+
#ifdef _KERNEL
static MALLOC_DEFINE(M_SBUF, "sbuf", "string buffers");
#define SBMALLOC(size) malloc(size, M_SBUF, M_WAITOK)
@@ -58,7 +66,6 @@ static MALLOC_DEFINE(M_SBUF, "sbuf", "string buffers");
#define KASSERT(e, m)
#define SBMALLOC(size) malloc(size)
#define SBFREE(buf) free(buf)
-#define min(x,y) MIN(x,y)
#endif /* _KERNEL */
/*
@@ -118,18 +125,24 @@ _assert_sbuf_state(const char *fun, struct sbuf *s, int state)
#endif /* _KERNEL && INVARIANTS */
+#ifdef CTASSERT
+CTASSERT(powerof2(SBUF_MAXEXTENDSIZE));
+CTASSERT(powerof2(SBUF_MAXEXTENDINCR));
+#endif
+
static int
sbuf_extendsize(int size)
{
int newsize;
- newsize = SBUF_MINEXTENDSIZE;
- while (newsize < size) {
- if (newsize < (int)SBUF_MAXEXTENDSIZE)
+ if (size < (int)SBUF_MAXEXTENDSIZE) {
+ newsize = SBUF_MINEXTENDSIZE;
+ while (newsize < size)
newsize *= 2;
- else
- newsize += SBUF_MAXEXTENDINCR;
+ } else {
+ newsize = roundup2(size, SBUF_MAXEXTENDINCR);
}
+ KASSERT(newsize >= size, ("%s: %d < %d\n", __func__, newsize, size));
return (newsize);
}
@@ -186,11 +199,11 @@ sbuf_new(struct sbuf *s, char *buf, int length, int flags)
s->s_flags = flags;
}
s->s_size = length;
- if (buf) {
+ if (buf != NULL) {
s->s_buf = buf;
return (s);
}
- if (flags & SBUF_AUTOEXTEND)
+ if ((flags & SBUF_AUTOEXTEND) != 0)
s->s_size = sbuf_extendsize(s->s_size);
s->s_buf = SBMALLOC(s->s_size);
if (s->s_buf == NULL) {
@@ -243,6 +256,8 @@ sbuf_clear(struct sbuf *s)
SBUF_CLEARFLAG(s, SBUF_FINISHED);
SBUF_CLEARFLAG(s, SBUF_OVERFLOWED);
+ if (s->s_drain != NULL)
+ s->s_drain->s_error = 0;
s->s_len = 0;
}
@@ -269,27 +284,125 @@ sbuf_setpos(struct sbuf *s, int pos)
}
/*
+ * Set up a drain function and argument on an sbuf to flush data to
+ * when the sbuf buffer overflows.
+ */
+void
+sbuf_set_drain(struct sbuf *s, sbuf_drain_func *func, void *ctx)
+{
+
+ assert_sbuf_state(s, 0);
+ assert_sbuf_integrity(s);
+ KASSERT((s->s_drain != NULL && func == s->s_drain->s_func) ||
+ s->s_len == 0,
+ ("Cannot change drain to %p on non-empty sbuf %p", func, s));
+ if (func == NULL) {
+ SBFREE(s->s_drain);
+ s->s_drain = NULL;
+ return;
+ }
+ if (s->s_drain == NULL) {
+ s->s_drain = SBMALLOC(sizeof(*s->s_drain));
+ if (s->s_drain == NULL)
+ return;
+ }
+ s->s_drain->s_func = func;
+ s->s_drain->s_arg = ctx;
+ s->s_drain->s_error = 0;
+}
+
+/*
+ * Call the drain and process the return.
+ */
+static int
+sbuf_drain(struct sbuf *s)
+{
+ int len;
+
+ KASSERT(s->s_len > 0, ("Shouldn't drain empty sbuf %p", s));
+ len = s->s_drain->s_func(s->s_drain->s_arg, s->s_buf, s->s_len);
+ if (len < 0) {
+ s->s_drain->s_error = -len;
+ SBUF_SETFLAG(s, SBUF_OVERFLOWED);
+ return (s->s_drain->s_error);
+ }
+ KASSERT(len > 0 && len <= s->s_len,
+ ("Bad drain amount %d for sbuf %p", len, s));
+ s->s_len -= len;
+ /*
+ * Fast path for the expected case where all the data was
+ * drained.
+ */
+ if (s->s_len == 0)
+ return (0);
+ /*
+ * Move the remaining characters to the beginning of the
+ * string.
+ */
+ memmove(s->s_buf, s->s_buf + len, s->s_len);
+ return (0);
+}
+
+/*
+ * Append a byte to an sbuf. This is the core function for appending
+ * to an sbuf and is the main place that deals with extending the
+ * buffer and marking overflow.
+ */
+static void
+sbuf_put_byte(int c, struct sbuf *s)
+{
+
+ assert_sbuf_integrity(s);
+ assert_sbuf_state(s, 0);
+
+ if (SBUF_HASOVERFLOWED(s))
+ return;
+ if (SBUF_FREESPACE(s) <= 0) {
+ /*
+ * If there is a drain, use it, otherwise extend the
+ * buffer.
+ */
+ if (s->s_drain != NULL)
+ (void)sbuf_drain(s);
+ else if (sbuf_extend(s, 1) < 0)
+ SBUF_SETFLAG(s, SBUF_OVERFLOWED);
+ if (SBUF_HASOVERFLOWED(s))
+ return;
+ }
+ s->s_buf[s->s_len++] = c;
+}
+
+/*
+ * Append a non-NUL character to an sbuf. This prototype signature is
+ * suitable for use with kvprintf(9).
+ */
+static void
+sbuf_putc_func(int c, void *arg)
+{
+
+ if (c != '\0')
+ sbuf_put_byte(c, arg);
+}
+
+/*
* Append a byte string to an sbuf.
*/
int
sbuf_bcat(struct sbuf *s, const void *buf, size_t len)
{
const char *str = buf;
+ const char *end = str + len;
assert_sbuf_integrity(s);
assert_sbuf_state(s, 0);
if (SBUF_HASOVERFLOWED(s))
return (-1);
- for (; len; len--) {
- if (!SBUF_HASROOM(s) && sbuf_extend(s, len) < 0)
- break;
- s->s_buf[s->s_len++] = *str++;
- }
- if (len) {
- SBUF_SETFLAG(s, SBUF_OVERFLOWED);
- return (-1);
- }
+ for (; str < end; str++) {
+ sbuf_put_byte(*str, s);
+ if (SBUF_HASOVERFLOWED(s))
+ return (-1);
+ }
return (0);
}
@@ -303,6 +416,8 @@ sbuf_bcopyin(struct sbuf *s, const void *uaddr, size_t len)
assert_sbuf_integrity(s);
assert_sbuf_state(s, 0);
+ KASSERT(s->s_drain == NULL,
+ ("Nonsensical copyin to sbuf %p with a drain", s));
if (SBUF_HASOVERFLOWED(s))
return (-1);
@@ -310,7 +425,8 @@ sbuf_bcopyin(struct sbuf *s, const void *uaddr, size_t len)
return (0);
if (len > SBUF_FREESPACE(s)) {
sbuf_extend(s, len - SBUF_FREESPACE(s));
- len = min(len, SBUF_FREESPACE(s));
+ if (SBUF_FREESPACE(s) < len)
+ len = SBUF_FREESPACE(s);
}
if (copyin(uaddr, s->s_buf + s->s_len, len) != 0)
return (-1);
@@ -347,14 +463,10 @@ sbuf_cat(struct sbuf *s, const char *str)
if (SBUF_HASOVERFLOWED(s))
return (-1);
- while (*str) {
- if (!SBUF_HASROOM(s) && sbuf_extend(s, strlen(str)) < 0)
- break;
- s->s_buf[s->s_len++] = *str++;
- }
- if (*str) {
- SBUF_SETFLAG(s, SBUF_OVERFLOWED);
- return (-1);
+ while (*str != '\0') {
+ sbuf_put_byte(*str++, s);
+ if (SBUF_HASOVERFLOWED(s))
+ return (-1);
}
return (0);
}
@@ -370,6 +482,8 @@ sbuf_copyin(struct sbuf *s, const void *uaddr, size_t len)
assert_sbuf_integrity(s);
assert_sbuf_state(s, 0);
+ KASSERT(s->s_drain == NULL,
+ ("Nonsensical copyin to sbuf %p with a drain", s));
if (SBUF_HASOVERFLOWED(s))
return (-1);
@@ -378,7 +492,8 @@ sbuf_copyin(struct sbuf *s, const void *uaddr, size_t len)
len = SBUF_FREESPACE(s); /* XXX return 0? */
if (len > SBUF_FREESPACE(s)) {
sbuf_extend(s, len);
- len = min(len, SBUF_FREESPACE(s));
+ if (SBUF_FREESPACE(s) < len)
+ len = SBUF_FREESPACE(s);
}
switch (copyinstr(uaddr, s->s_buf + s->s_len, len + 1, &done)) {
case ENAMETOOLONG:
@@ -412,11 +527,28 @@ sbuf_cpy(struct sbuf *s, const char *str)
/*
* Format the given argument list and append the resulting string to an sbuf.
*/
+#if defined(_KERNEL) && !defined(__rtems__)
+int
+sbuf_vprintf(struct sbuf *s, const char *fmt, va_list ap)
+{
+
+ assert_sbuf_integrity(s);
+ assert_sbuf_state(s, 0);
+
+ KASSERT(fmt != NULL,
+ ("%s called with a NULL format string", __func__));
+
+ (void)kvprintf(fmt, sbuf_putc_func, s, 10, ap);
+ if (SBUF_HASOVERFLOWED(s))
+ return (-1);
+ return (0);
+}
+#else /* !_KERNEL */
int
sbuf_vprintf(struct sbuf *s, const char *fmt, va_list ap)
{
va_list ap_copy;
- int len;
+ int error, len;
assert_sbuf_integrity(s);
assert_sbuf_state(s, 0);
@@ -427,13 +559,32 @@ sbuf_vprintf(struct sbuf *s, const char *fmt, va_list ap)
if (SBUF_HASOVERFLOWED(s))
return (-1);
+ /*
+ * For the moment, there is no way to get vsnprintf(3) to hand
+ * back a character at a time, to push everything into
+ * sbuf_putc_func() as was done for the kernel.
+ *
+ * In userspace, while drains are useful, there's generally
+ * not a problem attempting to malloc(3) on out of space. So
+ * expand a userland sbuf if there is not enough room for the
+ * data produced by sbuf_[v]printf(3).
+ */
+
+ error = 0;
do {
va_copy(ap_copy, ap);
len = vsnprintf(&s->s_buf[s->s_len], SBUF_FREESPACE(s) + 1,
fmt, ap_copy);
va_end(ap_copy);
- } while (len > SBUF_FREESPACE(s) &&
- sbuf_extend(s, len - SBUF_FREESPACE(s)) == 0);
+
+ if (SBUF_FREESPACE(s) >= len)
+ break;
+ /* Cannot print with the current available space. */
+ if (s->s_drain != NULL && s->s_len > 0)
+ error = sbuf_drain(s);
+ else
+ error = sbuf_extend(s, len - SBUF_FREESPACE(s));
+ } while (error == 0);
/*
* s->s_len is the length of the string, without the terminating nul.
@@ -442,9 +593,11 @@ sbuf_vprintf(struct sbuf *s, const char *fmt, va_list ap)
* terminating nul.
*
* vsnprintf() returns the amount that would have been copied,
- * given sufficient space, hence the min() calculation below.
+ * given sufficient space, so don't over-increment s_len.
*/
- s->s_len += min(len, SBUF_FREESPACE(s));
+ if (SBUF_FREESPACE(s) < len)
+ len = SBUF_FREESPACE(s);
+ s->s_len += len;
if (!SBUF_HASROOM(s) && !SBUF_CANEXTEND(s))
SBUF_SETFLAG(s, SBUF_OVERFLOWED);
@@ -455,6 +608,7 @@ sbuf_vprintf(struct sbuf *s, const char *fmt, va_list ap)
return (-1);
return (0);
}
+#endif /* _KERNEL */
/*
* Format the given arguments and append the resulting string to an sbuf.
@@ -478,17 +632,9 @@ int
sbuf_putc(struct sbuf *s, int c)
{
- assert_sbuf_integrity(s);
- assert_sbuf_state(s, 0);
-
+ sbuf_putc_func(c, s);
if (SBUF_HASOVERFLOWED(s))
return (-1);
- if (!SBUF_HASROOM(s) && sbuf_extend(s, 1) < 0) {
- SBUF_SETFLAG(s, SBUF_OVERFLOWED);
- return (-1);
- }
- if (c != '\0')
- s->s_buf[s->s_len++] = c;
return (0);
}
@@ -501,11 +647,13 @@ sbuf_trim(struct sbuf *s)
assert_sbuf_integrity(s);
assert_sbuf_state(s, 0);
+ KASSERT(s->s_drain == NULL,
+ ("%s makes no sense on sbuf %p with drain", __func__, s));
if (SBUF_HASOVERFLOWED(s))
return (-1);
- while (s->s_len && isspace(s->s_buf[s->s_len-1]))
+ while (s->s_len > 0 && isspace(s->s_buf[s->s_len-1]))
--s->s_len;
return (0);
@@ -524,16 +672,32 @@ sbuf_overflowed(struct sbuf *s)
/*
* Finish off an sbuf.
*/
-void
+int
sbuf_finish(struct sbuf *s)
{
+ int error = 0;
assert_sbuf_integrity(s);
assert_sbuf_state(s, 0);
+ if (s->s_drain != NULL) {
+ error = s->s_drain->s_error;
+ while (s->s_len > 0 && error == 0)
+ error = sbuf_drain(s);
+ } else if (SBUF_HASOVERFLOWED(s))
+ error = ENOMEM;
s->s_buf[s->s_len] = '\0';
SBUF_CLEARFLAG(s, SBUF_OVERFLOWED);
SBUF_SETFLAG(s, SBUF_FINISHED);
+#ifdef _KERNEL
+ return (error);
+#else
+ /*XXX*/if (error) {
+ errno = error;
+ return (-1);
+ } else
+ return (0);
+#endif
}
/*
@@ -545,6 +709,8 @@ sbuf_data(struct sbuf *s)
assert_sbuf_integrity(s);
assert_sbuf_state(s, SBUF_FINISHED);
+ KASSERT(s->s_drain == NULL,
+ ("%s makes no sense on sbuf %p with drain", __func__, s));
return (s->s_buf);
}
@@ -558,6 +724,8 @@ sbuf_len(struct sbuf *s)
assert_sbuf_integrity(s);
/* don't care if it's finished or not */
+ KASSERT(s->s_drain == NULL,
+ ("%s makes no sense on sbuf %p with drain", __func__, s));
if (SBUF_HASOVERFLOWED(s))
return (-1);
@@ -577,6 +745,8 @@ sbuf_delete(struct sbuf *s)
if (SBUF_ISDYNAMIC(s))
SBFREE(s->s_buf);
+ if (s->s_drain != NULL)
+ SBFREE(s->s_drain);
isdyn = SBUF_ISDYNSTRUCT(s);
bzero(s, sizeof(*s));
if (isdyn)
diff --git a/freebsd/sys/kern/subr_taskqueue.c b/freebsd/sys/kern/subr_taskqueue.c
index bbfe69e4..d2072082 100644
--- a/freebsd/sys/kern/subr_taskqueue.c
+++ b/freebsd/sys/kern/subr_taskqueue.c
@@ -35,6 +35,7 @@ __FBSDID("$FreeBSD$");
#include <sys/interrupt.h>
#include <sys/kernel.h>
#include <sys/kthread.h>
+#include <sys/limits.h>
#include <rtems/bsd/sys/lock.h>
#include <sys/malloc.h>
#include <sys/mutex.h>
@@ -57,7 +58,6 @@ struct taskqueue_busy {
struct taskqueue {
STAILQ_HEAD(, task) tq_queue;
- const char *tq_name;
taskqueue_enqueue_fn tq_enqueue;
void *tq_context;
TAILQ_HEAD(, taskqueue_busy) tq_active;
@@ -110,7 +110,7 @@ TQ_SLEEP(struct taskqueue *tq, void *p, struct mtx *m, int pri, const char *wm,
}
static struct taskqueue *
-_taskqueue_create(const char *name, int mflags,
+_taskqueue_create(const char *name __unused, int mflags,
taskqueue_enqueue_fn enqueue, void *context,
int mtxflags, const char *mtxname)
{
@@ -122,7 +122,6 @@ _taskqueue_create(const char *name, int mflags,
STAILQ_INIT(&queue->tq_queue);
TAILQ_INIT(&queue->tq_active);
- queue->tq_name = name;
queue->tq_enqueue = enqueue;
queue->tq_context = context;
#ifndef __rtems__
@@ -186,7 +185,8 @@ taskqueue_enqueue(struct taskqueue *queue, struct task *task)
* Count multiple enqueues.
*/
if (task->ta_pending) {
- task->ta_pending++;
+ if (task->ta_pending < USHRT_MAX)
+ task->ta_pending++;
TQ_UNLOCK(queue);
return 0;
}
@@ -485,7 +485,7 @@ taskqueue_fast_run(void *dummy)
}
TASKQUEUE_FAST_DEFINE(fast, taskqueue_fast_enqueue, NULL,
- swi_add(NULL, "Fast task queue", taskqueue_fast_run, NULL,
+ swi_add(NULL, "fast taskq", taskqueue_fast_run, NULL,
SWI_TQ_FAST, INTR_MPSAFE, &taskqueue_fast_ih));
int
diff --git a/freebsd/sys/kern/subr_unit.c b/freebsd/sys/kern/subr_unit.c
index 1719ede8..386a470b 100644
--- a/freebsd/sys/kern/subr_unit.c
+++ b/freebsd/sys/kern/subr_unit.c
@@ -43,7 +43,7 @@
*
* If a mutex is not provided when the unit number space is created, a
* default global mutex is used. The advantage to passing a mutex in, is
- * that the the alloc_unrl() function can be called with the mutex already
+ * that the alloc_unrl() function can be called with the mutex already
* held (it will not be released by alloc_unrl()).
*
* The allocation function alloc_unr{l}() never sleeps (but it may block on
@@ -54,7 +54,7 @@
*
* A userland test program is included.
*
- * Memory usage is a very complex function of the the exact allocation
+ * Memory usage is a very complex function of the exact allocation
* pattern, but always very compact:
* * For the very typical case where a single unbroken run of unit
* numbers are allocated 44 bytes are used on i386.
@@ -65,7 +65,7 @@
* in the usermode test program included, the worst case usage
* was 798 bytes on i386 for 5000 allocated and 5000 free units.
* * The worst case is where every other unit number is allocated and
- * the the rest are free. In that case 44 + N/4 bytes are used where
+ * the rest are free. In that case 44 + N/4 bytes are used where
* N is the number of the highest unit allocated.
*/
@@ -630,6 +630,132 @@ alloc_unr(struct unrhdr *uh)
return (i);
}
+static int
+alloc_unr_specificl(struct unrhdr *uh, u_int item, void **p1, void **p2)
+{
+ struct unr *up, *upn;
+ struct unrb *ub;
+ u_int i, last, tl;
+
+ mtx_assert(uh->mtx, MA_OWNED);
+
+ if (item < uh->low + uh->first || item > uh->high)
+ return (-1);
+
+ up = TAILQ_FIRST(&uh->head);
+ /* Ideal split. */
+ if (up == NULL && item - uh->low == uh->first) {
+ uh->first++;
+ uh->last--;
+ uh->busy++;
+ check_unrhdr(uh, __LINE__);
+ return (item);
+ }
+
+ i = item - uh->low - uh->first;
+
+ if (up == NULL) {
+ up = new_unr(uh, p1, p2);
+ up->ptr = NULL;
+ up->len = i;
+ TAILQ_INSERT_TAIL(&uh->head, up, list);
+ up = new_unr(uh, p1, p2);
+ up->ptr = uh;
+ up->len = 1;
+ TAILQ_INSERT_TAIL(&uh->head, up, list);
+ uh->last = uh->high - uh->low - i;
+ uh->busy++;
+ check_unrhdr(uh, __LINE__);
+ return (item);
+ } else {
+ /* Find the item which contains the unit we want to allocate. */
+ TAILQ_FOREACH(up, &uh->head, list) {
+ if (up->len > i)
+ break;
+ i -= up->len;
+ }
+ }
+
+ if (up == NULL) {
+ if (i > 0) {
+ up = new_unr(uh, p1, p2);
+ up->ptr = NULL;
+ up->len = i;
+ TAILQ_INSERT_TAIL(&uh->head, up, list);
+ }
+ up = new_unr(uh, p1, p2);
+ up->ptr = uh;
+ up->len = 1;
+ TAILQ_INSERT_TAIL(&uh->head, up, list);
+ goto done;
+ }
+
+ if (is_bitmap(uh, up)) {
+ ub = up->ptr;
+ if (bit_test(ub->map, i) == 0) {
+ bit_set(ub->map, i);
+ ub->busy++;
+ goto done;
+ } else
+ return (-1);
+ } else if (up->ptr == uh)
+ return (-1);
+
+ KASSERT(up->ptr == NULL,
+ ("alloc_unr_specificl: up->ptr != NULL (up=%p)", up));
+
+ /* Split off the tail end, if any. */
+ tl = up->len - (1 + i);
+ if (tl > 0) {
+ upn = new_unr(uh, p1, p2);
+ upn->ptr = NULL;
+ upn->len = tl;
+ TAILQ_INSERT_AFTER(&uh->head, up, upn, list);
+ }
+
+ /* Split off head end, if any */
+ if (i > 0) {
+ upn = new_unr(uh, p1, p2);
+ upn->len = i;
+ upn->ptr = NULL;
+ TAILQ_INSERT_BEFORE(up, upn, list);
+ }
+ up->len = 1;
+ up->ptr = uh;
+
+done:
+ last = uh->high - uh->low - (item - uh->low);
+ if (uh->last > last)
+ uh->last = last;
+ uh->busy++;
+ collapse_unr(uh, up);
+ check_unrhdr(uh, __LINE__);
+ return (item);
+}
+
+int
+alloc_unr_specific(struct unrhdr *uh, u_int item)
+{
+ void *p1, *p2;
+ int i;
+
+ WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, "alloc_unr_specific");
+
+ p1 = Malloc(sizeof(struct unr));
+ p2 = Malloc(sizeof(struct unr));
+
+ mtx_lock(uh->mtx);
+ i = alloc_unr_specificl(uh, item, &p1, &p2);
+ mtx_unlock(uh->mtx);
+
+ if (p1 != NULL)
+ Free(p1);
+ if (p2 != NULL)
+ Free(p2);
+
+ return (i);
+}
+
/*
* Free a unr.
*
@@ -812,6 +938,42 @@ print_unrhdr(struct unrhdr *uh)
}
}
+static void
+test_alloc_unr(struct unrhdr *uh, u_int i, char a[])
+{
+ int j;
+
+ if (a[i]) {
+ printf("F %u\n", i);
+ free_unr(uh, i);
+ a[i] = 0;
+ } else {
+ no_alloc = 1;
+ j = alloc_unr(uh);
+ if (j != -1) {
+ a[j] = 1;
+ printf("A %d\n", j);
+ }
+ no_alloc = 0;
+ }
+}
+
+static void
+test_alloc_unr_specific(struct unrhdr *uh, u_int i, char a[])
+{
+ int j;
+
+ j = alloc_unr_specific(uh, i);
+ if (j == -1) {
+ printf("F %u\n", i);
+ a[i] = 0;
+ free_unr(uh, i);
+ } else {
+ a[i] = 1;
+ printf("A %d\n", j);
+ }
+}
+
/* Number of unrs to test */
#define NN 10000
@@ -827,6 +989,7 @@ main(int argc __unused, const char **argv __unused)
print_unrhdr(uh);
memset(a, 0, sizeof a);
+ srandomdev();
fprintf(stderr, "sizeof(struct unr) %zu\n", sizeof(struct unr));
fprintf(stderr, "sizeof(struct unrb) %zu\n", sizeof(struct unrb));
@@ -840,19 +1003,11 @@ main(int argc __unused, const char **argv __unused)
if (a[i] && (j & 1))
continue;
#endif
- if (a[i]) {
- printf("F %u\n", i);
- free_unr(uh, i);
- a[i] = 0;
- } else {
- no_alloc = 1;
- i = alloc_unr(uh);
- if (i != -1) {
- a[i] = 1;
- printf("A %u\n", i);
- }
- no_alloc = 0;
- }
+ if ((random() & 1) != 0)
+ test_alloc_unr(uh, i, a);
+ else
+ test_alloc_unr_specific(uh, i, a);
+
if (1) /* XXX: change this for detailed debug printout */
print_unrhdr(uh);
check_unrhdr(uh, __LINE__);
diff --git a/freebsd/sys/kern/sys_generic.c b/freebsd/sys/kern/sys_generic.c
index b0201d68..deda7afd 100644
--- a/freebsd/sys/kern/sys_generic.c
+++ b/freebsd/sys/kern/sys_generic.c
@@ -843,6 +843,54 @@ select(struct thread *td, struct select_args *uap)
}
#endif /* __rtems__ */
+/*
+ * In the unlikely case when user specified n greater then the last
+ * open file descriptor, check that no bits are set after the last
+ * valid fd. We must return EBADF if any is set.
+ *
+ * There are applications that rely on the behaviour.
+ *
+ * nd is fd_lastfile + 1.
+ */
+static int
+select_check_badfd(fd_set *fd_in, int nd, int ndu, int abi_nfdbits)
+{
+ char *addr, *oaddr;
+ int b, i, res;
+ uint8_t bits;
+
+ if (nd >= ndu || fd_in == NULL)
+ return (0);
+
+ oaddr = NULL;
+ bits = 0; /* silence gcc */
+ for (i = nd; i < ndu; i++) {
+ b = i / NBBY;
+#if BYTE_ORDER == LITTLE_ENDIAN
+ addr = (char *)fd_in + b;
+#else
+ addr = (char *)fd_in;
+ if (abi_nfdbits == NFDBITS) {
+ addr += rounddown(b, sizeof(fd_mask)) +
+ sizeof(fd_mask) - 1 - b % sizeof(fd_mask);
+ } else {
+ addr += rounddown(b, sizeof(uint32_t)) +
+ sizeof(uint32_t) - 1 - b % sizeof(uint32_t);
+ }
+#endif
+ if (addr != oaddr) {
+ res = fubyte(addr);
+ if (res == -1)
+ return (EFAULT);
+ oaddr = addr;
+ bits = res;
+ }
+ if ((bits & (1 << (i % NBBY))) != 0)
+ return (EBADF);
+ }
+ return (0);
+}
+
int
kern_select(struct thread *td, int nd, fd_set *fd_in, fd_set *fd_ou,
fd_set *fd_ex, struct timeval *tvp, int abi_nfdbits)
@@ -857,20 +905,30 @@ kern_select(struct thread *td, int nd, fd_set *fd_in, fd_set *fd_ou,
fd_mask s_selbits[howmany(2048, NFDBITS)];
fd_mask *ibits[3], *obits[3], *selbits, *sbp;
struct timeval atv, rtv, ttv;
- int error, timo;
+ int error, lf, ndu, timo;
u_int nbufbytes, ncpbytes, ncpubytes, nfdbits;
if (nd < 0)
return (EINVAL);
+ ndu = nd;
#ifndef __rtems__
- fdp = td->td_proc->p_fd;
- if (nd > fdp->fd_lastfile + 1)
- nd = fdp->fd_lastfile + 1;
+ lf = fdp->fd_lastfile;
#else /* __rtems__ */
(void) fdp;
- if (nd > rtems_libio_number_iops)
- nd = rtems_libio_number_iops;
+ lf = rtems_libio_number_iops;
#endif /* __rtems__ */
+ if (nd > lf + 1)
+ nd = lf + 1;
+
+ error = select_check_badfd(fd_in, nd, ndu, abi_nfdbits);
+ if (error != 0)
+ return (error);
+ error = select_check_badfd(fd_ou, nd, ndu, abi_nfdbits);
+ if (error != 0)
+ return (error);
+ error = select_check_badfd(fd_ex, nd, ndu, abi_nfdbits);
+ if (error != 0)
+ return (error);
/*
* Allocate just enough bits for the non-null fd_sets. Use the
@@ -1204,7 +1262,7 @@ rtems_bsd_poll(td, uap)
struct pollfd *bits;
struct pollfd smallbits[32];
struct timeval atv, rtv, ttv;
- int error = 0, timo;
+ int error, timo;
u_int nfds;
size_t ni;
@@ -1549,6 +1607,23 @@ selfdfree(struct seltd *stp, struct selfd *sfp)
uma_zfree(selfd_zone, sfp);
}
+/* Drain the waiters tied to all the selfd belonging the specified selinfo. */
+void
+seldrain(sip)
+ struct selinfo *sip;
+{
+
+ /*
+ * This feature is already provided by doselwakeup(), thus it is
+ * enough to go for it.
+ * Eventually, the context, should take care to avoid races
+ * between thread calling select()/poll() and file descriptor
+ * detaching, but, again, the races are just the same as
+ * selwakeup().
+ */
+ doselwakeup(sip, -1);
+}
+
/*
* Record a select request.
*/
diff --git a/freebsd/sys/kern/sys_socket.c b/freebsd/sys/kern/sys_socket.c
index 6bb4416e..e86698fa 100644
--- a/freebsd/sys/kern/sys_socket.c
+++ b/freebsd/sys/kern/sys_socket.c
@@ -87,9 +87,7 @@ soo_read(struct file *fp, struct uio *uio, struct ucred *active_cred,
if (error)
return (error);
#endif
- CURVNET_SET(so->so_vnet);
error = soreceive(so, 0, uio, 0, 0, 0);
- CURVNET_RESTORE();
return (error);
}
#ifdef __rtems__
@@ -210,7 +208,6 @@ soo_ioctl(struct file *fp, u_long cmd, void *data, struct ucred *active_cred,
struct socket *so = fp->f_data;
int error = 0;
- CURVNET_SET(so->so_vnet);
switch (cmd) {
case FIONBIO:
SOCK_LOCK(so);
@@ -297,14 +294,18 @@ soo_ioctl(struct file *fp, u_long cmd, void *data, struct ucred *active_cred,
*/
if (IOCGROUP(cmd) == 'i')
error = ifioctl(so, cmd, data, td);
- else if (IOCGROUP(cmd) == 'r')
+ else if (IOCGROUP(cmd) == 'r') {
+ CURVNET_SET(so->so_vnet);
error = rtioctl_fib(cmd, data, so->so_fibnum);
- else
+ CURVNET_RESTORE();
+ } else {
+ CURVNET_SET(so->so_vnet);
error = ((*so->so_proto->pr_usrreqs->pru_control)
(so, cmd, data, 0, td));
+ CURVNET_RESTORE();
+ }
break;
}
- CURVNET_RESTORE();
return (error);
}
#ifdef __rtems__
diff --git a/freebsd/sys/kern/uipc_domain.c b/freebsd/sys/kern/uipc_domain.c
index 1b2be102..7a70e246 100644
--- a/freebsd/sys/kern/uipc_domain.c
+++ b/freebsd/sys/kern/uipc_domain.c
@@ -222,7 +222,7 @@ domain_add(void *data)
domains = dp;
KASSERT(domain_init_status >= 1,
- ("attempt to net_add_domain(%s) before domaininit()",
+ ("attempt to domain_add(%s) before domaininit()",
dp->dom_name));
#ifndef INVARIANTS
if (domain_init_status < 1)
diff --git a/freebsd/sys/kern/uipc_mbuf.c b/freebsd/sys/kern/uipc_mbuf.c
index 43255aab..f3b7c10d 100644
--- a/freebsd/sys/kern/uipc_mbuf.c
+++ b/freebsd/sys/kern/uipc_mbuf.c
@@ -282,7 +282,7 @@ mb_free_ext(struct mbuf *m)
}
/*
- * Attach the the cluster from *m to *n, set up m_ext in *n
+ * Attach the cluster from *m to *n, set up m_ext in *n
* and bump the refcount of the cluster.
*/
static void
@@ -1412,6 +1412,11 @@ m_print(const struct mbuf *m, int maxlen)
int pdata;
const struct mbuf *m2;
+ if (m == NULL) {
+ printf("mbuf: %p\n", m);
+ return;
+ }
+
if (m->m_flags & M_PKTHDR)
len = m->m_pkthdr.len;
else
diff --git a/freebsd/sys/kern/uipc_sockbuf.c b/freebsd/sys/kern/uipc_sockbuf.c
index 48195f31..1ee7a831 100644
--- a/freebsd/sys/kern/uipc_sockbuf.c
+++ b/freebsd/sys/kern/uipc_sockbuf.c
@@ -534,9 +534,6 @@ sbappendstream_locked(struct sockbuf *sb, struct mbuf *m)
SBLASTMBUFCHK(sb);
- /* Remove all packet headers and mbuf tags to get a pure data chain. */
- m_demote(m, 1);
-
sbcompress(sb, m, sb->sb_mbtail);
sb->sb_lastrecord = sb->sb_mb;
diff --git a/freebsd/sys/kern/uipc_socket.c b/freebsd/sys/kern/uipc_socket.c
index 473c3df0..5b3b9eef 100644
--- a/freebsd/sys/kern/uipc_socket.c
+++ b/freebsd/sys/kern/uipc_socket.c
@@ -94,6 +94,12 @@
* from a listen queue to a file descriptor, in order to prevent garbage
* collection of the socket at an untimely moment. For a number of reasons,
* these interfaces are not preferred, and should be avoided.
+ *
+ * NOTE: With regard to VNETs the general rule is that callers do not set
+ * curvnet. Exceptions to this rule include soabort(), sodisconnect(),
+ * sofree() (and with that sorele(), sotryfree()), as well as sonewconn()
+ * and sorflush(), which are usually called from a pre-set VNET context.
+ * sopoll() currently does not need a VNET context to be set.
*/
#include <sys/cdefs.h>
@@ -168,6 +174,10 @@ int maxsockets;
MALLOC_DEFINE(M_SONAME, "soname", "socket name");
MALLOC_DEFINE(M_PCB, "pcb", "protocol control block");
+#define VNET_SO_ASSERT(so) \
+ VNET_ASSERT(curvnet != NULL, \
+ ("%s:%d curvnet is NULL, so=%p", __func__, __LINE__, (so)));
+
static int somaxconn = SOMAXCONN;
static int sysctl_somaxconn(SYSCTL_HANDLER_ARGS);
/* XXX: we dont have SYSCTL_USHORT */
@@ -294,6 +304,8 @@ soalloc(struct vnet *vnet)
so->so_gencnt = ++so_gencnt;
++numopensockets;
#ifdef VIMAGE
+ VNET_ASSERT(vnet != NULL, ("%s:%d vnet is NULL, so=%p",
+ __func__, __LINE__, so));
vnet->vnet_sockcnt++;
so->so_vnet = vnet;
#endif
@@ -317,6 +329,8 @@ sodealloc(struct socket *so)
so->so_gencnt = ++so_gencnt;
--numopensockets; /* Could be below, but faster here. */
#ifdef VIMAGE
+ VNET_ASSERT(so->so_vnet != NULL, ("%s:%d so_vnet is NULL, so=%p",
+ __func__, __LINE__, so));
so->so_vnet->vnet_sockcnt--;
#endif
mtx_unlock(&so_global_mtx);
@@ -377,6 +391,7 @@ socreate(int dom, struct socket **aso, int type, int proto,
so->so_type = type;
so->so_cred = crhold(cred);
if ((prp->pr_domain->dom_family == PF_INET) ||
+ (prp->pr_domain->dom_family == PF_INET6) ||
(prp->pr_domain->dom_family == PF_ROUTE))
#ifndef __rtems__
so->so_fibnum = td->td_proc->p_fibnum;
@@ -440,7 +455,8 @@ sonewconn(struct socket *head, int connstatus)
if (over)
#endif
return (NULL);
- VNET_ASSERT(head->so_vnet);
+ VNET_ASSERT(head->so_vnet != NULL, ("%s:%d so_vnet is NULL, head=%p",
+ __func__, __LINE__, head));
so = soalloc(head->so_vnet);
if (so == NULL)
return (NULL);
@@ -459,6 +475,7 @@ sonewconn(struct socket *head, int connstatus)
#endif
knlist_init_mtx(&so->so_rcv.sb_sel.si_note, SOCKBUF_MTX(&so->so_rcv));
knlist_init_mtx(&so->so_snd.sb_sel.si_note, SOCKBUF_MTX(&so->so_snd));
+ VNET_SO_ASSERT(head);
if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat) ||
(*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL)) {
sodealloc(so);
@@ -533,8 +550,12 @@ sobind(struct socket *so, struct sockaddr *nam, struct thread *td)
int
solisten(struct socket *so, int backlog, struct thread *td)
{
+ int error;
- return ((*so->so_proto->pr_usrreqs->pru_listen)(so, backlog, td));
+ CURVNET_SET(so->so_vnet);
+ error = (*so->so_proto->pr_usrreqs->pru_listen)(so, backlog, td);
+ CURVNET_RESTORE();
+ return error;
}
int
@@ -617,11 +638,12 @@ sofree(struct socket *so)
so->so_qstate & SQ_COMP, so->so_qstate & SQ_INCOMP));
if (so->so_options & SO_ACCEPTCONN) {
KASSERT((TAILQ_EMPTY(&so->so_comp)), ("sofree: so_comp populated"));
- KASSERT((TAILQ_EMPTY(&so->so_incomp)), ("sofree: so_comp populated"));
+ KASSERT((TAILQ_EMPTY(&so->so_incomp)), ("sofree: so_incomp populated"));
}
SOCK_UNLOCK(so);
ACCEPT_UNLOCK();
+ VNET_SO_ASSERT(so);
if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose != NULL)
(*pr->pr_domain->dom_dispose)(so->so_rcv.sb_mb);
if (pr->pr_usrreqs->pru_detach != NULL)
@@ -643,6 +665,8 @@ sofree(struct socket *so)
*/
sbdestroy(&so->so_snd, so);
sbdestroy(&so->so_rcv, so);
+ seldrain(&so->so_snd.sb_sel);
+ seldrain(&so->so_rcv.sb_sel);
knlist_destroy(&so->so_rcv.sb_sel.si_note);
knlist_destroy(&so->so_snd.sb_sel.si_note);
sodealloc(so);
@@ -751,6 +775,7 @@ soabort(struct socket *so)
KASSERT(so->so_state & SS_NOFDREF, ("soabort: !SS_NOFDREF"));
KASSERT((so->so_state & SQ_COMP) == 0, ("soabort: SQ_COMP"));
KASSERT((so->so_state & SQ_INCOMP) == 0, ("soabort: SQ_INCOMP"));
+ VNET_SO_ASSERT(so);
if (so->so_proto->pr_usrreqs->pru_abort != NULL)
(*so->so_proto->pr_usrreqs->pru_abort)(so);
@@ -768,7 +793,10 @@ soaccept(struct socket *so, struct sockaddr **nam)
KASSERT((so->so_state & SS_NOFDREF) != 0, ("soaccept: !NOFDREF"));
so->so_state &= ~SS_NOFDREF;
SOCK_UNLOCK(so);
+
+ CURVNET_SET(so->so_vnet);
error = (*so->so_proto->pr_usrreqs->pru_accept)(so, nam);
+ CURVNET_RESTORE();
return (error);
}
@@ -806,8 +834,12 @@ soconnect(struct socket *so, struct sockaddr *nam, struct thread *td)
int
soconnect2(struct socket *so1, struct socket *so2)
{
+ int error;
- return ((*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2));
+ CURVNET_SET(so1->so_vnet);
+ error = (*so1->so_proto->pr_usrreqs->pru_connect2)(so1, so2);
+ CURVNET_RESTORE();
+ return (error);
}
int
@@ -819,6 +851,7 @@ sodisconnect(struct socket *so)
return (ENOTCONN);
if (so->so_state & SS_ISDISCONNECTING)
return (EALREADY);
+ VNET_SO_ASSERT(so);
error = (*so->so_proto->pr_usrreqs->pru_disconnect)(so);
return (error);
}
@@ -1084,6 +1117,7 @@ sosend_dgram(struct socket *so, struct sockaddr *addr, struct uio *uio,
* there are probably other places that this also happens. We must
* rethink this.
*/
+ VNET_SO_ASSERT(so);
error = (*so->so_proto->pr_usrreqs->pru_send)(so,
(flags & MSG_OOB) ? PRUS_OOB :
/*
@@ -1273,6 +1307,7 @@ restart:
* places that this also happens. We must rethink
* this.
*/
+ VNET_SO_ASSERT(so);
error = (*so->so_proto->pr_usrreqs->pru_send)(so,
(flags & MSG_OOB) ? PRUS_OOB :
/*
@@ -1339,6 +1374,7 @@ soreceive_rcvoob(struct socket *so, struct uio *uio, int flags)
int error;
KASSERT(flags & MSG_OOB, ("soreceive_rcvoob: (flags & MSG_OOB) == 0"));
+ VNET_SO_ASSERT(so);
m = m_get(M_WAIT, MT_DATA);
error = (*pr->pr_usrreqs->pru_rcvoob)(so, m, flags & MSG_PEEK);
@@ -1447,8 +1483,10 @@ soreceive_generic(struct socket *so, struct sockaddr **psa, struct uio *uio,
if (mp != NULL)
*mp = NULL;
if ((pr->pr_flags & PR_WANTRCVD) && (so->so_state & SS_ISCONFIRMING)
- && uio->uio_resid)
+ && uio->uio_resid) {
+ VNET_SO_ASSERT(so);
(*pr->pr_usrreqs->pru_rcvd)(so, 0);
+ }
error = sblock(&so->so_rcv, SBLOCKWAIT(flags));
if (error)
@@ -1461,17 +1499,11 @@ restart:
* If we have less data than requested, block awaiting more (subject
* to any timeout) if:
* 1. the current count is less than the low water mark, or
- * 2. MSG_WAITALL is set, and it is possible to do the entire
- * receive operation at once if we block (resid <= hiwat).
- * 3. MSG_DONTWAIT is not set
- * If MSG_WAITALL is set but resid is larger than the receive buffer,
- * we have to do the receive in sections, and thus risk returning a
- * short count if a timeout or signal occurs after we start.
+ * 2. MSG_DONTWAIT is not set
*/
if (m == NULL || (((flags & MSG_DONTWAIT) == 0 &&
so->so_rcv.sb_cc < uio->uio_resid) &&
- (so->so_rcv.sb_cc < so->so_rcv.sb_lowat ||
- ((flags & MSG_WAITALL) && uio->uio_resid <= so->so_rcv.sb_hiwat)) &&
+ so->so_rcv.sb_cc < so->so_rcv.sb_lowat &&
m->m_nextpkt == NULL && (pr->pr_flags & PR_ATOMIC) == 0)) {
KASSERT(m != NULL || !so->so_rcv.sb_cc,
("receive: m == %p so->so_rcv.sb_cc == %u",
@@ -1597,6 +1629,7 @@ dontblock:
cm->m_next = NULL;
if (pr->pr_domain->dom_externalize != NULL) {
SOCKBUF_UNLOCK(&so->so_rcv);
+ VNET_SO_ASSERT(so);
error = (*pr->pr_domain->dom_externalize)
(cm, controlp);
SOCKBUF_LOCK(&so->so_rcv);
@@ -1661,8 +1694,8 @@ dontblock:
* examined ('type'), end the receive operation.
*/
SOCKBUF_LOCK_ASSERT(&so->so_rcv);
- if (m->m_type == MT_OOBDATA) {
- if (type != MT_OOBDATA)
+ if (m->m_type == MT_OOBDATA || m->m_type == MT_CONTROL) {
+ if (type != m->m_type)
break;
} else if (type == MT_OOBDATA)
break;
@@ -1812,15 +1845,22 @@ dontblock:
*/
if (pr->pr_flags & PR_WANTRCVD) {
SOCKBUF_UNLOCK(&so->so_rcv);
+ VNET_SO_ASSERT(so);
(*pr->pr_usrreqs->pru_rcvd)(so, flags);
SOCKBUF_LOCK(&so->so_rcv);
}
SBLASTRECORDCHK(&so->so_rcv);
SBLASTMBUFCHK(&so->so_rcv);
- error = sbwait(&so->so_rcv);
- if (error) {
- SOCKBUF_UNLOCK(&so->so_rcv);
- goto release;
+ /*
+ * We could receive some data while was notifying
+ * the protocol. Skip blocking in this case.
+ */
+ if (so->so_rcv.sb_mb == NULL) {
+ error = sbwait(&so->so_rcv);
+ if (error) {
+ SOCKBUF_UNLOCK(&so->so_rcv);
+ goto release;
+ }
}
m = so->so_rcv.sb_mb;
if (m != NULL)
@@ -1858,6 +1898,7 @@ dontblock:
if (!(flags & MSG_SOCALLBCK) &&
(pr->pr_flags & PR_WANTRCVD)) {
SOCKBUF_UNLOCK(&so->so_rcv);
+ VNET_SO_ASSERT(so);
(*pr->pr_usrreqs->pru_rcvd)(so, flags);
SOCKBUF_LOCK(&so->so_rcv);
}
@@ -2055,6 +2096,7 @@ deliver:
(((flags & MSG_WAITALL) && uio->uio_resid > 0) ||
!(flags & MSG_SOCALLBCK))) {
SOCKBUF_UNLOCK(sb);
+ VNET_SO_ASSERT(so);
(*so->so_proto->pr_usrreqs->pru_rcvd)(so, flags);
SOCKBUF_LOCK(sb);
}
@@ -2266,9 +2308,13 @@ int
soreceive(struct socket *so, struct sockaddr **psa, struct uio *uio,
struct mbuf **mp0, struct mbuf **controlp, int *flagsp)
{
+ int error;
- return (so->so_proto->pr_usrreqs->pru_soreceive(so, psa, uio, mp0,
+ CURVNET_SET(so->so_vnet);
+ error = (so->so_proto->pr_usrreqs->pru_soreceive(so, psa, uio, mp0,
controlp, flagsp));
+ CURVNET_RESTORE();
+ return (error);
}
int
@@ -2279,17 +2325,19 @@ soshutdown(struct socket *so, int how)
if (!(how == SHUT_RD || how == SHUT_WR || how == SHUT_RDWR))
return (EINVAL);
+
+ CURVNET_SET(so->so_vnet);
if (pr->pr_usrreqs->pru_flush != NULL) {
(*pr->pr_usrreqs->pru_flush)(so, how);
}
if (how != SHUT_WR)
sorflush(so);
if (how != SHUT_RD) {
- CURVNET_SET(so->so_vnet);
error = (*pr->pr_usrreqs->pru_shutdown)(so);
CURVNET_RESTORE();
return (error);
}
+ CURVNET_RESTORE();
return (0);
}
@@ -2300,6 +2348,8 @@ sorflush(struct socket *so)
struct protosw *pr = so->so_proto;
struct sockbuf asb;
+ VNET_SO_ASSERT(so);
+
/*
* In order to avoid calling dom_dispose with the socket buffer mutex
* held, and in order to generally avoid holding the lock for a long
@@ -2313,7 +2363,6 @@ sorflush(struct socket *so)
* socket buffer. Don't let our acquire be interrupted by a signal
* despite any existing socket disposition on interruptable waiting.
*/
- CURVNET_SET(so->so_vnet);
socantrcvmore(so);
(void) sblock(sb, SBL_WAIT | SBL_NOINTR);
@@ -2337,7 +2386,6 @@ sorflush(struct socket *so)
if (pr->pr_flags & PR_RIGHTS && pr->pr_domain->dom_dispose != NULL)
(*pr->pr_domain->dom_dispose)(asb.sb_mb);
sbrelease_internal(&asb, so);
- CURVNET_RESTORE();
}
/*
@@ -2401,11 +2449,14 @@ sosetopt(struct socket *so, struct sockopt *sopt)
struct mac extmac;
#endif
+ CURVNET_SET(so->so_vnet);
error = 0;
if (sopt->sopt_level != SOL_SOCKET) {
- if (so->so_proto && so->so_proto->pr_ctloutput)
- return ((*so->so_proto->pr_ctloutput)
- (so, sopt));
+ if (so->so_proto->pr_ctloutput != NULL) {
+ error = (*so->so_proto->pr_ctloutput)(so, sopt);
+ CURVNET_RESTORE();
+ return (error);
+ }
error = ENOPROTOOPT;
} else {
switch (sopt->sopt_name) {
@@ -2458,15 +2509,16 @@ sosetopt(struct socket *so, struct sockopt *sopt)
case SO_SETFIB:
error = sooptcopyin(sopt, &optval, sizeof optval,
sizeof optval);
- if (optval < 1 || optval > rt_numfibs) {
+ if (optval < 0 || optval >= rt_numfibs) {
error = EINVAL;
goto bad;
}
- if ((so->so_proto->pr_domain->dom_family == PF_INET) ||
- (so->so_proto->pr_domain->dom_family == PF_ROUTE)) {
+ if (((so->so_proto->pr_domain->dom_family == PF_INET) ||
+ (so->so_proto->pr_domain->dom_family == PF_INET6) ||
+ (so->so_proto->pr_domain->dom_family == PF_ROUTE))) {
so->so_fibnum = optval;
/* Note: ignore error */
- if (so->so_proto && so->so_proto->pr_ctloutput)
+ if (so->so_proto->pr_ctloutput)
(*so->so_proto->pr_ctloutput)(so, sopt);
} else {
so->so_fibnum = 0;
@@ -2584,13 +2636,11 @@ sosetopt(struct socket *so, struct sockopt *sopt)
error = ENOPROTOOPT;
break;
}
- if (error == 0 && so->so_proto != NULL &&
- so->so_proto->pr_ctloutput != NULL) {
- (void) ((*so->so_proto->pr_ctloutput)
- (so, sopt));
- }
+ if (error == 0 && so->so_proto->pr_ctloutput != NULL)
+ (void)(*so->so_proto->pr_ctloutput)(so, sopt);
}
bad:
+ CURVNET_RESTORE();
return (error);
}
@@ -2634,13 +2684,15 @@ sogetopt(struct socket *so, struct sockopt *sopt)
struct mac extmac;
#endif
+ CURVNET_SET(so->so_vnet);
error = 0;
if (sopt->sopt_level != SOL_SOCKET) {
- if (so->so_proto && so->so_proto->pr_ctloutput) {
- return ((*so->so_proto->pr_ctloutput)
- (so, sopt));
- } else
- return (ENOPROTOOPT);
+ if (so->so_proto->pr_ctloutput != NULL)
+ error = (*so->so_proto->pr_ctloutput)(so, sopt);
+ else
+ error = ENOPROTOOPT;
+ CURVNET_RESTORE();
+ return (error);
} else {
switch (sopt->sopt_name) {
#ifdef INET
@@ -2677,6 +2729,10 @@ integer:
optval = so->so_type;
goto integer;
+ case SO_PROTOCOL:
+ optval = so->so_proto->pr_protocol;
+ goto integer;
+
case SO_ERROR:
SOCK_LOCK(so);
optval = so->so_error;
@@ -2724,11 +2780,11 @@ integer:
error = sooptcopyin(sopt, &extmac, sizeof(extmac),
sizeof(extmac));
if (error)
- return (error);
+ goto bad;
error = mac_getsockopt_label(sopt->sopt_td->td_ucred,
so, &extmac);
if (error)
- return (error);
+ goto bad;
error = sooptcopyout(sopt, &extmac, sizeof extmac);
#else
error = EOPNOTSUPP;
@@ -2740,11 +2796,11 @@ integer:
error = sooptcopyin(sopt, &extmac, sizeof(extmac),
sizeof(extmac));
if (error)
- return (error);
+ goto bad;
error = mac_getsockopt_peerlabel(
sopt->sopt_td->td_ucred, so, &extmac);
if (error)
- return (error);
+ goto bad;
error = sooptcopyout(sopt, &extmac, sizeof extmac);
#else
error = EOPNOTSUPP;
@@ -2767,8 +2823,12 @@ integer:
error = ENOPROTOOPT;
break;
}
- return (error);
}
+#ifdef MAC
+bad:
+#endif
+ CURVNET_RESTORE();
+ return (error);
}
/* XXX; prepare mbuf for (__FreeBSD__ < 3) routines. */
@@ -2902,6 +2962,10 @@ sopoll(struct socket *so, int events, struct ucred *active_cred,
struct thread *td)
{
+ /*
+ * We do not need to set or assert curvnet as long as everyone uses
+ * sopoll_generic().
+ */
return (so->so_proto->pr_usrreqs->pru_sopoll(so, events, active_cred,
td));
}
diff --git a/freebsd/sys/kern/uipc_syscalls.c b/freebsd/sys/kern/uipc_syscalls.c
index 5ee11b0c..960aae87 100644
--- a/freebsd/sys/kern/uipc_syscalls.c
+++ b/freebsd/sys/kern/uipc_syscalls.c
@@ -368,14 +368,9 @@ rtems_bsd_listen(td, uap)
so = fp->f_data;
#ifdef MAC
error = mac_socket_check_listen(td->td_ucred, so);
- if (error == 0) {
+ if (error == 0)
#endif
- CURVNET_SET(so->so_vnet);
error = solisten(so, uap->backlog, td);
- CURVNET_RESTORE();
-#ifdef MAC
- }
-#endif
fdrop(fp, td);
}
return(error);
@@ -591,9 +586,7 @@ kern_accept(struct thread *td, int s, struct sockaddr **name,
tmp = fflag & FASYNC;
(void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td);
sa = 0;
- CURVNET_SET(so->so_vnet);
error = soaccept(so, &sa);
- CURVNET_RESTORE();
if (error) {
/*
* return a namelen of zero for older code which might
@@ -953,6 +946,10 @@ kern_sendit(td, s, mp, flags, control, segflg)
return (error);
so = (struct socket *)fp->f_data;
+#ifdef KTRACE
+ if (mp->msg_name != NULL && KTRPOINT(td, KTR_STRUCT))
+ ktrsockaddr(mp->msg_name);
+#endif
#ifdef MAC
if (mp->msg_name != NULL) {
error = mac_socket_check_connect(td->td_ucred, so,
@@ -1253,11 +1250,9 @@ kern_recvit(td, s, mp, fromseg, controlp)
ktruio = cloneuio(&auio);
#endif
len = auio.uio_resid;
- CURVNET_SET(so->so_vnet);
error = soreceive(so, &fromsa, &auio, (struct mbuf **)0,
(mp->msg_control || controlp) ? &control : (struct mbuf **)0,
&mp->msg_flags);
- CURVNET_RESTORE();
if (error) {
if (auio.uio_resid != (int)len && (error == ERESTART ||
error == EINTR || error == EWOULDBLOCK))
@@ -1720,9 +1715,7 @@ kern_setsockopt(td, s, level, name, val, valseg, valsize)
error = getsock(td->td_proc->p_fd, s, &fp, NULL);
if (error == 0) {
so = fp->f_data;
- CURVNET_SET(so->so_vnet);
error = sosetopt(so, &sopt);
- CURVNET_RESTORE();
fdrop(fp, td);
}
return(error);
@@ -1834,9 +1827,7 @@ kern_getsockopt(td, s, level, name, val, valseg, valsize)
error = getsock(td->td_proc->p_fd, s, &fp, NULL);
if (error == 0) {
so = fp->f_data;
- CURVNET_SET(so->so_vnet);
error = sogetopt(so, &sopt);
- CURVNET_RESTORE();
*valsize = sopt.sopt_valsize;
fdrop(fp, td);
}
@@ -2609,11 +2600,17 @@ retry_space:
}
/*
- * Get a sendfile buf. We usually wait as long
- * as necessary, but this wait can be interrupted.
+ * Get a sendfile buf. When allocating the
+ * first buffer for mbuf chain, we usually
+ * wait as long as necessary, but this wait
+ * can be interrupted. For consequent
+ * buffers, do not sleep, since several
+ * threads might exhaust the buffers and then
+ * deadlock.
*/
- if ((sf = sf_buf_alloc(pg,
- (mnw ? SFB_NOWAIT : SFB_CATCH))) == NULL) {
+ sf = sf_buf_alloc(pg, (mnw || m != NULL) ? SFB_NOWAIT :
+ SFB_CATCH);
+ if (sf == NULL) {
mbstat.sf_allocfail++;
vm_page_lock_queues();
vm_page_unwire(pg, 0);
@@ -2623,7 +2620,8 @@ retry_space:
if (pg->wire_count == 0 && pg->object == NULL)
vm_page_free(pg);
vm_page_unlock_queues();
- error = (mnw ? EAGAIN : EINTR);
+ if (m == NULL)
+ error = (mnw ? EAGAIN : EINTR);
break;
}
@@ -2783,9 +2781,13 @@ sctp_peeloff(td, uap)
error = fgetsock(td, uap->sd, &head, &fflag);
if (error)
goto done2;
+ if (head->so_proto->pr_protocol != IPPROTO_SCTP) {
+ error = EOPNOTSUPP;
+ goto done;
+ }
error = sctp_can_peel_off(head, (sctp_assoc_t)uap->name);
if (error)
- goto done2;
+ goto done;
/*
* At this point we know we do have a assoc to pull
* we proceed to get the fd setup. This may block
@@ -2901,6 +2903,10 @@ sctp_generic_sendmsg (td, uap)
iov[0].iov_len = uap->mlen;
so = (struct socket *)fp->f_data;
+ if (so->so_proto->pr_protocol != IPPROTO_SCTP) {
+ error = EOPNOTSUPP;
+ goto sctp_bad;
+ }
#ifdef MAC
error = mac_socket_check_send(td->td_ucred, so);
if (error)
@@ -3011,6 +3017,10 @@ sctp_generic_sendmsg_iov(td, uap)
#endif
so = (struct socket *)fp->f_data;
+ if (so->so_proto->pr_protocol != IPPROTO_SCTP) {
+ error = EOPNOTSUPP;
+ goto sctp_bad;
+ }
#ifdef MAC
error = mac_socket_check_send(td->td_ucred, so);
if (error)
@@ -3115,6 +3125,10 @@ sctp_generic_recvmsg(td, uap)
goto out1;
so = fp->f_data;
+ if (so->so_proto->pr_protocol != IPPROTO_SCTP) {
+ error = EOPNOTSUPP;
+ goto out;
+ }
#ifdef MAC
error = mac_socket_check_receive(td->td_ucred, so);
if (error) {