summaryrefslogtreecommitdiffstats
path: root/freebsd/sys/kern
diff options
context:
space:
mode:
authorSebastian Huber <sebastian.huber@embedded-brains.de>2018-08-22 14:59:50 +0200
committerSebastian Huber <sebastian.huber@embedded-brains.de>2018-09-21 10:29:41 +0200
commit3489e3b6396ee9944a6a2e19e675ca54c36993b4 (patch)
treecd55cfac1c96ff4b888a9606fd6a0d8eb65bb446 /freebsd/sys/kern
parentck: Define CK_MD_PPC32_LWSYNC if available (diff)
downloadrtems-libbsd-3489e3b6396ee9944a6a2e19e675ca54c36993b4.tar.bz2
Update to FreeBSD head 2018-09-17
Git mirror commit 6c2192b1ef8c50788c751f878552526800b1e319. Update #3472.
Diffstat (limited to 'freebsd/sys/kern')
-rw-r--r--freebsd/sys/kern/init_main.c24
-rw-r--r--freebsd/sys/kern/kern_event.c116
-rw-r--r--freebsd/sys/kern/kern_intr.c193
-rw-r--r--freebsd/sys/kern/kern_sysctl.c102
-rw-r--r--freebsd/sys/kern/kern_time.c4
-rw-r--r--freebsd/sys/kern/subr_blist.c14
-rw-r--r--freebsd/sys/kern/subr_bus.c133
-rw-r--r--freebsd/sys/kern/subr_counter.c15
-rw-r--r--freebsd/sys/kern/subr_gtaskqueue.c18
-rw-r--r--freebsd/sys/kern/subr_hints.c353
-rw-r--r--freebsd/sys/kern/subr_module.c36
-rw-r--r--freebsd/sys/kern/subr_pcpu.c2
-rw-r--r--freebsd/sys/kern/subr_prf.c25
-rwxr-xr-xfreebsd/sys/kern/sys_pipe.c99
-rw-r--r--freebsd/sys/kern/uipc_sockbuf.c79
-rw-r--r--freebsd/sys/kern/uipc_socket.c28
-rw-r--r--freebsd/sys/kern/uipc_syscalls.c97
-rw-r--r--freebsd/sys/kern/uipc_usrreq.c157
18 files changed, 952 insertions, 543 deletions
diff --git a/freebsd/sys/kern/init_main.c b/freebsd/sys/kern/init_main.c
index 42afff5e..fa4951d8 100644
--- a/freebsd/sys/kern/init_main.c
+++ b/freebsd/sys/kern/init_main.c
@@ -122,6 +122,18 @@ int bootverbose = BOOTVERBOSE;
SYSCTL_INT(_debug, OID_AUTO, bootverbose, CTLFLAG_RW, &bootverbose, 0,
"Control the output of verbose kernel messages");
+#ifdef VERBOSE_SYSINIT
+/*
+ * We'll use the defined value of VERBOSE_SYSINIT from the kernel config to
+ * dictate the default VERBOSE_SYSINIT behavior. Significant values for this
+ * option and associated tunable are:
+ * - 0, 'compiled in but silent by default'
+ * - 1, 'compiled in but verbose by default' (default)
+ */
+int verbose_sysinit = VERBOSE_SYSINIT;
+TUNABLE_INT("debug.verbose_sysinit", &verbose_sysinit);
+#endif
+
#ifdef INVARIANTS
FEATURE(invariants, "Kernel compiled with INVARIANTS, may affect performance");
#endif
@@ -287,7 +299,7 @@ restart:
continue;
#if defined(VERBOSE_SYSINIT)
- if ((*sipp)->subsystem > last) {
+ if ((*sipp)->subsystem > last && verbose_sysinit != 0) {
verbose = 1;
last = (*sipp)->subsystem;
printf("subsystem %x\n", last);
@@ -526,6 +538,7 @@ proc0_init(void *dummy __unused)
p->p_peers = 0;
p->p_leader = p;
p->p_reaper = p;
+ p->p_treeflag |= P_TREE_REAPER;
LIST_INIT(&p->p_reaplist);
strncpy(p->p_comm, "kernel", sizeof (p->p_comm));
@@ -642,17 +655,23 @@ proc0_post(void *dummy __unused)
*/
sx_slock(&allproc_lock);
FOREACH_PROC_IN_SYSTEM(p) {
+ PROC_LOCK(p);
+ if (p->p_state == PRS_NEW) {
+ PROC_UNLOCK(p);
+ continue;
+ }
microuptime(&p->p_stats->p_start);
PROC_STATLOCK(p);
rufetch(p, &ru); /* Clears thread stats */
- PROC_STATUNLOCK(p);
p->p_rux.rux_runtime = 0;
p->p_rux.rux_uticks = 0;
p->p_rux.rux_sticks = 0;
p->p_rux.rux_iticks = 0;
+ PROC_STATUNLOCK(p);
FOREACH_THREAD_IN_PROC(p, td) {
td->td_runtime = 0;
}
+ PROC_UNLOCK(p);
}
sx_sunlock(&allproc_lock);
PCPU_SET(switchtime, cpu_ticks());
@@ -857,7 +876,6 @@ create_init(const void *udata __unused)
PROC_LOCK(initproc);
initproc->p_flag |= P_SYSTEM | P_INMEM;
initproc->p_treeflag |= P_TREE_REAPER;
- LIST_INSERT_HEAD(&initproc->p_reaplist, &proc0, p_reapsibling);
oldcred = initproc->p_ucred;
crcopy(newcred, oldcred);
#ifdef MAC
diff --git a/freebsd/sys/kern/kern_event.c b/freebsd/sys/kern/kern_event.c
index 33fca549..25a9518f 100644
--- a/freebsd/sys/kern/kern_event.c
+++ b/freebsd/sys/kern/kern_event.c
@@ -179,6 +179,10 @@ static int filt_fileattach(struct knote *kn);
static void filt_timerexpire(void *knx);
static int filt_timerattach(struct knote *kn);
static void filt_timerdetach(struct knote *kn);
+static void filt_timerstart(struct knote *kn, sbintime_t to);
+static void filt_timertouch(struct knote *kn, struct kevent *kev,
+ u_long type);
+static int filt_timervalidate(struct knote *kn, sbintime_t *to);
static int filt_timer(struct knote *kn, long hint);
static int filt_userattach(struct knote *kn);
static void filt_userdetach(struct knote *kn);
@@ -209,6 +213,7 @@ static struct filterops timer_filtops = {
.f_attach = filt_timerattach,
.f_detach = filt_timerdetach,
.f_event = filt_timer,
+ .f_touch = filt_timertouch,
};
static struct filterops user_filtops = {
.f_attach = filt_userattach,
@@ -738,29 +743,44 @@ filt_timerexpire(void *knx)
* data contains amount of time to sleep
*/
static int
-filt_timerattach(struct knote *kn)
+filt_timervalidate(struct knote *kn, sbintime_t *to)
{
- struct kq_timer_cb_data *kc;
struct bintime bt;
- sbintime_t to, sbt;
- unsigned int ncallouts;
+ sbintime_t sbt;
if (kn->kn_sdata < 0)
return (EINVAL);
if (kn->kn_sdata == 0 && (kn->kn_flags & EV_ONESHOT) == 0)
kn->kn_sdata = 1;
- /* Only precision unit are supported in flags so far */
+ /*
+ * The only fflags values supported are the timer unit
+ * (precision) and the absolute time indicator.
+ */
if ((kn->kn_sfflags & ~(NOTE_TIMER_PRECMASK | NOTE_ABSTIME)) != 0)
return (EINVAL);
- to = timer2sbintime(kn->kn_sdata, kn->kn_sfflags);
+ *to = timer2sbintime(kn->kn_sdata, kn->kn_sfflags);
if ((kn->kn_sfflags & NOTE_ABSTIME) != 0) {
getboottimebin(&bt);
sbt = bttosbt(bt);
- to -= sbt;
+ *to -= sbt;
}
- if (to < 0)
+ if (*to < 0)
return (EINVAL);
+ return (0);
+}
+
+static int
+filt_timerattach(struct knote *kn)
+{
+ struct kq_timer_cb_data *kc;
+ sbintime_t to;
+ unsigned int ncallouts;
+ int error;
+
+ error = filt_timervalidate(kn, &to);
+ if (error != 0)
+ return (error);
do {
ncallouts = kq_ncallouts;
@@ -773,6 +793,17 @@ filt_timerattach(struct knote *kn)
kn->kn_status &= ~KN_DETACHED; /* knlist_add clears it */
kn->kn_ptr.p_v = kc = malloc(sizeof(*kc), M_KQUEUE, M_WAITOK);
callout_init(&kc->c, 1);
+ filt_timerstart(kn, to);
+
+ return (0);
+}
+
+static void
+filt_timerstart(struct knote *kn, sbintime_t to)
+{
+ struct kq_timer_cb_data *kc;
+
+ kc = kn->kn_ptr.p_v;
if ((kn->kn_sfflags & NOTE_ABSTIME) != 0) {
kc->next = to;
kc->to = 0;
@@ -782,8 +813,6 @@ filt_timerattach(struct knote *kn)
}
callout_reset_sbt_on(&kc->c, kc->next, 0, filt_timerexpire, kn,
PCPU_GET(cpuid), C_ABSOLUTE);
-
- return (0);
}
static void
@@ -800,6 +829,73 @@ filt_timerdetach(struct knote *kn)
kn->kn_status |= KN_DETACHED; /* knlist_remove sets it */
}
+static void
+filt_timertouch(struct knote *kn, struct kevent *kev, u_long type)
+{
+ struct kq_timer_cb_data *kc;
+ struct kqueue *kq;
+ sbintime_t to;
+ int error;
+
+ switch (type) {
+ case EVENT_REGISTER:
+ /* Handle re-added timers that update data/fflags */
+ if (kev->flags & EV_ADD) {
+ kc = kn->kn_ptr.p_v;
+
+ /* Drain any existing callout. */
+ callout_drain(&kc->c);
+
+ /* Throw away any existing undelivered record
+ * of the timer expiration. This is done under
+ * the presumption that if a process is
+ * re-adding this timer with new parameters,
+ * it is no longer interested in what may have
+ * happened under the old parameters. If it is
+ * interested, it can wait for the expiration,
+ * delete the old timer definition, and then
+ * add the new one.
+ *
+ * This has to be done while the kq is locked:
+ * - if enqueued, dequeue
+ * - make it no longer active
+ * - clear the count of expiration events
+ */
+ kq = kn->kn_kq;
+ KQ_LOCK(kq);
+ if (kn->kn_status & KN_QUEUED)
+ knote_dequeue(kn);
+
+ kn->kn_status &= ~KN_ACTIVE;
+ kn->kn_data = 0;
+ KQ_UNLOCK(kq);
+
+ /* Reschedule timer based on new data/fflags */
+ kn->kn_sfflags = kev->fflags;
+ kn->kn_sdata = kev->data;
+ error = filt_timervalidate(kn, &to);
+ if (error != 0) {
+ kn->kn_flags |= EV_ERROR;
+ kn->kn_data = error;
+ } else
+ filt_timerstart(kn, to);
+ }
+ break;
+
+ case EVENT_PROCESS:
+ *kev = kn->kn_kevent;
+ if (kn->kn_flags & EV_CLEAR) {
+ kn->kn_data = 0;
+ kn->kn_fflags = 0;
+ }
+ break;
+
+ default:
+ panic("filt_timertouch() - invalid type (%ld)", type);
+ break;
+ }
+}
+
static int
filt_timer(struct knote *kn, long hint)
{
diff --git a/freebsd/sys/kern/kern_intr.c b/freebsd/sys/kern/kern_intr.c
index 8f6c2a6d..04914e93 100644
--- a/freebsd/sys/kern/kern_intr.c
+++ b/freebsd/sys/kern/kern_intr.c
@@ -175,12 +175,13 @@ ithread_update(struct intr_thread *ithd)
ie = ithd->it_event;
td = ithd->it_thread;
+ mtx_assert(&ie->ie_lock, MA_OWNED);
/* Determine the overall priority of this event. */
- if (TAILQ_EMPTY(&ie->ie_handlers))
+ if (CK_SLIST_EMPTY(&ie->ie_handlers))
pri = PRI_MAX_ITHD;
else
- pri = TAILQ_FIRST(&ie->ie_handlers)->ih_pri;
+ pri = CK_SLIST_FIRST(&ie->ie_handlers)->ih_pri;
/* Update name and priority. */
#ifndef __rtems__
@@ -218,7 +219,7 @@ intr_event_update(struct intr_event *ie)
space = 1;
/* Run through all the handlers updating values. */
- TAILQ_FOREACH(ih, &ie->ie_handlers, ih_next) {
+ CK_SLIST_FOREACH(ih, &ie->ie_handlers, ih_next) {
if (strlen(ie->ie_fullname) + strlen(ih->ih_name) + 1 <
sizeof(ie->ie_fullname)) {
strcat(ie->ie_fullname, " ");
@@ -280,7 +281,7 @@ intr_event_create(struct intr_event **event, void *source, int flags, int irq,
ie->ie_flags = flags;
ie->ie_irq = irq;
ie->ie_cpu = NOCPU;
- TAILQ_INIT(&ie->ie_handlers);
+ CK_SLIST_INIT(&ie->ie_handlers);
mtx_init(&ie->ie_lock, "intr event", NULL, MTX_DEF);
va_start(ap, fmt);
@@ -402,7 +403,7 @@ intr_lookup(int irq)
TAILQ_FOREACH(ie, &event_list, ie_list)
if (ie->ie_irq == irq &&
(ie->ie_flags & IE_SOFT) == 0 &&
- TAILQ_FIRST(&ie->ie_handlers) != NULL)
+ CK_SLIST_FIRST(&ie->ie_handlers) != NULL)
break;
mtx_unlock(&event_lock);
return (ie);
@@ -498,7 +499,7 @@ intr_event_destroy(struct intr_event *ie)
mtx_lock(&event_lock);
mtx_lock(&ie->ie_lock);
- if (!TAILQ_EMPTY(&ie->ie_handlers)) {
+ if (!CK_SLIST_EMPTY(&ie->ie_handlers)) {
mtx_unlock(&ie->ie_lock);
mtx_unlock(&event_lock);
return (EBUSY);
@@ -532,7 +533,7 @@ ithread_create(const char *name)
error = kproc_kthread_add(ithread_loop, ithd, &intrproc,
&td, RFSTOPPED | RFHIGHPID,
- 0, "intr", "%s", name);
+ 0, "intr", "%s", name);
if (error)
panic("kproc_create() failed with %d", error);
thread_lock(td);
@@ -573,6 +574,7 @@ intr_event_add_handler(struct intr_event *ie, const char *name,
enum intr_type flags, void **cookiep)
{
struct intr_handler *ih, *temp_ih;
+ struct intr_handler **prevptr;
struct intr_thread *it;
if (ie == NULL || name == NULL || (handler == NULL && filter == NULL))
@@ -595,9 +597,9 @@ intr_event_add_handler(struct intr_event *ie, const char *name,
/* We can only have one exclusive handler in a event. */
mtx_lock(&ie->ie_lock);
- if (!TAILQ_EMPTY(&ie->ie_handlers)) {
+ if (!CK_SLIST_EMPTY(&ie->ie_handlers)) {
if ((flags & INTR_EXCL) ||
- (TAILQ_FIRST(&ie->ie_handlers)->ih_flags & IH_EXCLUSIVE)) {
+ (CK_SLIST_FIRST(&ie->ie_handlers)->ih_flags & IH_EXCLUSIVE)) {
mtx_unlock(&ie->ie_lock);
free(ih, M_ITHREAD);
return (EINVAL);
@@ -622,14 +624,12 @@ intr_event_add_handler(struct intr_event *ie, const char *name,
}
/* Add the new handler to the event in priority order. */
- TAILQ_FOREACH(temp_ih, &ie->ie_handlers, ih_next) {
+ CK_SLIST_FOREACH_PREVPTR(temp_ih, prevptr, &ie->ie_handlers, ih_next) {
if (temp_ih->ih_pri > ih->ih_pri)
break;
}
- if (temp_ih == NULL)
- TAILQ_INSERT_TAIL(&ie->ie_handlers, ih, ih_next);
- else
- TAILQ_INSERT_BEFORE(temp_ih, ih, ih_next);
+ CK_SLIST_INSERT_PREVPTR(prevptr, temp_ih, ih, ih_next);
+
intr_event_update(ie);
CTR3(KTR_INTR, "%s: added %s to %s", __func__, ih->ih_name,
@@ -656,7 +656,7 @@ intr_event_describe_handler(struct intr_event *ie, void *cookie,
mtx_lock(&ie->ie_lock);
#ifdef INVARIANTS
- TAILQ_FOREACH(ih, &ie->ie_handlers, ih_next) {
+ CK_SLIST_FOREACH(ih, &ie->ie_handlers, ih_next) {
if (ih == cookie)
break;
}
@@ -718,6 +718,45 @@ intr_handler_source(void *cookie)
}
/*
+ * If intr_event_handle() is running in the ISR context at the time of the call,
+ * then wait for it to complete.
+ */
+static void
+intr_event_barrier(struct intr_event *ie)
+{
+ int phase;
+
+ mtx_assert(&ie->ie_lock, MA_OWNED);
+ phase = ie->ie_phase;
+
+ /*
+ * Switch phase to direct future interrupts to the other active counter.
+ * Make sure that any preceding stores are visible before the switch.
+ */
+ KASSERT(ie->ie_active[!phase] == 0, ("idle phase has activity"));
+ atomic_store_rel_int(&ie->ie_phase, !phase);
+
+ /*
+ * This code cooperates with wait-free iteration of ie_handlers
+ * in intr_event_handle.
+ * Make sure that the removal and the phase update are not reordered
+ * with the active count check.
+ * Note that no combination of acquire and release fences can provide
+ * that guarantee as Store->Load sequences can always be reordered.
+ */
+ atomic_thread_fence_seq_cst();
+
+ /*
+ * Now wait on the inactive phase.
+ * The acquire fence is needed so that that all post-barrier accesses
+ * are after the check.
+ */
+ while (ie->ie_active[phase] > 0)
+ cpu_spinwait();
+ atomic_thread_fence_acq();
+}
+
+/*
* Sleep until an ithread finishes executing an interrupt handler.
*
* XXX Doesn't currently handle interrupt filters or fast interrupt
@@ -757,16 +796,14 @@ _intr_drain(int irq)
}
#endif /* __rtems__ */
-
#ifndef __rtems__
int
intr_event_remove_handler(void *cookie)
{
struct intr_handler *handler = (struct intr_handler *)cookie;
struct intr_event *ie;
-#ifdef INVARIANTS
struct intr_handler *ih;
-#endif
+ struct intr_handler **prevptr;
#ifdef notyet
int dead;
#endif
@@ -777,60 +814,48 @@ intr_event_remove_handler(void *cookie)
KASSERT(ie != NULL,
("interrupt handler \"%s\" has a NULL interrupt event",
handler->ih_name));
+
mtx_lock(&ie->ie_lock);
CTR3(KTR_INTR, "%s: removing %s from %s", __func__, handler->ih_name,
ie->ie_name);
-#ifdef INVARIANTS
- TAILQ_FOREACH(ih, &ie->ie_handlers, ih_next)
+ CK_SLIST_FOREACH_PREVPTR(ih, prevptr, &ie->ie_handlers, ih_next) {
if (ih == handler)
- goto ok;
- mtx_unlock(&ie->ie_lock);
- panic("interrupt handler \"%s\" not found in interrupt event \"%s\"",
- ih->ih_name, ie->ie_name);
-ok:
-#endif
+ break;
+ }
+ if (ih == NULL) {
+ panic("interrupt handler \"%s\" not found in "
+ "interrupt event \"%s\"", handler->ih_name, ie->ie_name);
+ }
+
/*
- * If there is no ithread, then just remove the handler and return.
- * XXX: Note that an INTR_FAST handler might be running on another
- * CPU!
+ * If there is no ithread, then directly remove the handler. Note that
+ * intr_event_handle() iterates ie_handlers in a lock-less fashion, so
+ * care needs to be taken to keep ie_handlers consistent and to free
+ * the removed handler only when ie_handlers is quiescent.
*/
if (ie->ie_thread == NULL) {
- TAILQ_REMOVE(&ie->ie_handlers, handler, ih_next);
+ CK_SLIST_REMOVE_PREVPTR(prevptr, ih, ih_next);
+ intr_event_barrier(ie);
+ intr_event_update(ie);
mtx_unlock(&ie->ie_lock);
free(handler, M_ITHREAD);
return (0);
}
/*
- * If the interrupt thread is already running, then just mark this
- * handler as being dead and let the ithread do the actual removal.
- *
- * During a cold boot while cold is set, msleep() does not sleep,
- * so we have to remove the handler here rather than letting the
- * thread do it.
+ * Let the interrupt thread do the job.
+ * The interrupt source is disabled when the interrupt thread is
+ * running, so it does not have to worry about interaction with
+ * intr_event_handle().
*/
- thread_lock(ie->ie_thread->it_thread);
- if (!TD_AWAITING_INTR(ie->ie_thread->it_thread) && !cold) {
- handler->ih_flags |= IH_DEAD;
-
- /*
- * Ensure that the thread will process the handler list
- * again and remove this handler if it has already passed
- * it on the list.
- *
- * The release part of the following store ensures
- * that the update of ih_flags is ordered before the
- * it_need setting. See the comment before
- * atomic_cmpset_acq(&ithd->it_need, ...) operation in
- * the ithread_execute_handlers().
- */
- atomic_store_rel_int(&ie->ie_thread->it_need, 1);
- } else
- TAILQ_REMOVE(&ie->ie_handlers, handler, ih_next);
- thread_unlock(ie->ie_thread->it_thread);
+ KASSERT((handler->ih_flags & IH_DEAD) == 0,
+ ("duplicate handle remove"));
+ handler->ih_flags |= IH_DEAD;
+ intr_event_schedule_thread(ie);
while (handler->ih_flags & IH_DEAD)
msleep(handler, &ie->ie_lock, 0, "iev_rmh", 0);
intr_event_update(ie);
+
#ifdef notyet
/*
* XXX: This could be bad in the case of ppbus(8). Also, I think
@@ -838,8 +863,8 @@ ok:
* interrupt.
*/
dead = 1;
- TAILQ_FOREACH(ih, &ie->ie_handlers, ih_next) {
- if (!(ih->ih_flags & IH_FAST)) {
+ CK_SLIST_FOREACH(ih, &ie->ie_handlers, ih_next) {
+ if (ih->ih_handler != NULL) {
dead = 0;
break;
}
@@ -866,7 +891,7 @@ intr_event_schedule_thread(struct intr_event *ie)
/*
* If no ithread or no handlers, then we have a stray interrupt.
*/
- if (ie == NULL || TAILQ_EMPTY(&ie->ie_handlers) ||
+ if (ie == NULL || CK_SLIST_EMPTY(&ie->ie_handlers) ||
ie->ie_thread == NULL)
return (EINVAL);
@@ -881,7 +906,7 @@ intr_event_schedule_thread(struct intr_event *ie)
if (ie->ie_flags & IE_ENTROPY) {
entropy.event = (uintptr_t)ie;
entropy.td = ctd;
- random_harvest_queue(&entropy, sizeof(entropy), 2, RANDOM_INTERRUPT);
+ random_harvest_queue(&entropy, sizeof(entropy), RANDOM_INTERRUPT);
}
#ifndef __rtems__
@@ -981,7 +1006,7 @@ swi_sched(void *cookie, int flags)
entropy.event = (uintptr_t)ih;
entropy.td = curthread;
- random_harvest_queue(&entropy, sizeof(entropy), 1, RANDOM_SWI);
+ random_harvest_queue(&entropy, sizeof(entropy), RANDOM_SWI);
/*
* Set ih_need for this handler so that if the ithread is already
@@ -1012,32 +1037,37 @@ swi_remove(void *cookie)
return (intr_event_remove_handler(cookie));
}
-
-
#endif /* __rtems__ */
-/*
- * This is a public function for use by drivers that mux interrupt
- * handlers for child devices from their interrupt handler.
- */
-void
+
+static void
intr_event_execute_handlers(struct proc *p, struct intr_event *ie)
{
- struct intr_handler *ih, *ihn;
+ struct intr_handler *ih, *ihn, *ihp;
- TAILQ_FOREACH_SAFE(ih, &ie->ie_handlers, ih_next, ihn) {
+ ihp = NULL;
+ CK_SLIST_FOREACH_SAFE(ih, &ie->ie_handlers, ih_next, ihn) {
/*
* If this handler is marked for death, remove it from
* the list of handlers and wake up the sleeper.
*/
if (ih->ih_flags & IH_DEAD) {
mtx_lock(&ie->ie_lock);
- TAILQ_REMOVE(&ie->ie_handlers, ih, ih_next);
+ if (ihp == NULL)
+ CK_SLIST_REMOVE_HEAD(&ie->ie_handlers, ih_next);
+ else
+ CK_SLIST_REMOVE_AFTER(ihp, ih_next);
ih->ih_flags &= ~IH_DEAD;
wakeup(ih);
mtx_unlock(&ie->ie_lock);
continue;
}
+ /*
+ * Now that we know that the current element won't be removed
+ * update the previous element.
+ */
+ ihp = ih;
+
/* Skip filter only handlers */
if (ih->ih_handler == NULL)
continue;
@@ -1226,6 +1256,7 @@ intr_event_handle(struct intr_event *ie, struct trapframe *frame)
struct trapframe *oldframe;
struct thread *td;
int ret, thread;
+ int phase;
td = curthread;
@@ -1234,7 +1265,7 @@ intr_event_handle(struct intr_event *ie, struct trapframe *frame)
#endif
/* An interrupt with no event or handlers is a stray interrupt. */
- if (ie == NULL || TAILQ_EMPTY(&ie->ie_handlers))
+ if (ie == NULL || CK_SLIST_EMPTY(&ie->ie_handlers))
return (EINVAL);
/*
@@ -1249,7 +1280,17 @@ intr_event_handle(struct intr_event *ie, struct trapframe *frame)
critical_enter();
oldframe = td->td_intr_frame;
td->td_intr_frame = frame;
- TAILQ_FOREACH(ih, &ie->ie_handlers, ih_next) {
+
+ phase = ie->ie_phase;
+ atomic_add_int(&ie->ie_active[phase], 1);
+
+ /*
+ * This fence is required to ensure that no later loads are
+ * re-ordered before the ie_active store.
+ */
+ atomic_thread_fence_seq_cst();
+
+ CK_SLIST_FOREACH(ih, &ie->ie_handlers, ih_next) {
if (ih->ih_filter == NULL) {
thread = 1;
continue;
@@ -1286,6 +1327,8 @@ intr_event_handle(struct intr_event *ie, struct trapframe *frame)
thread = 1;
}
}
+ atomic_add_rel_int(&ie->ie_active[phase], -1);
+
td->td_intr_frame = oldframe;
if (thread) {
@@ -1295,7 +1338,7 @@ intr_event_handle(struct intr_event *ie, struct trapframe *frame)
if (ie->ie_post_filter != NULL)
ie->ie_post_filter(ie->ie_source);
}
-
+
/* Schedule the ithread if needed. */
if (thread) {
int error __unused;
@@ -1441,7 +1484,7 @@ db_dump_intr_event(struct intr_event *ie, int handlers)
db_printf("\n");
if (handlers)
- TAILQ_FOREACH(ih, &ie->ie_handlers, ih_next)
+ CK_SLIST_FOREACH(ih, &ie->ie_handlers, ih_next)
db_dump_intrhand(ih);
}
@@ -1456,7 +1499,7 @@ DB_SHOW_COMMAND(intr, db_show_intr)
verbose = strchr(modif, 'v') != NULL;
all = strchr(modif, 'a') != NULL;
TAILQ_FOREACH(ie, &event_list, ie_list) {
- if (!all && TAILQ_EMPTY(&ie->ie_handlers))
+ if (!all && CK_SLIST_EMPTY(&ie->ie_handlers))
continue;
db_dump_intr_event(ie, verbose);
if (db_pager_quit)
diff --git a/freebsd/sys/kern/kern_sysctl.c b/freebsd/sys/kern/kern_sysctl.c
index b4e9711f..3baea2e4 100644
--- a/freebsd/sys/kern/kern_sysctl.c
+++ b/freebsd/sys/kern/kern_sysctl.c
@@ -195,13 +195,8 @@ sysctl_load_tunable_by_oid_locked(struct sysctl_oid *oidp)
char path[96];
ssize_t rem = sizeof(path);
ssize_t len;
- uint8_t val_8;
- uint16_t val_16;
- uint32_t val_32;
- int val_int;
- long val_long;
- int64_t val_64;
- quad_t val_quad;
+ uint8_t data[512] __aligned(sizeof(uint64_t));
+ int size;
int error;
path[--rem] = 0;
@@ -229,85 +224,88 @@ sysctl_load_tunable_by_oid_locked(struct sysctl_oid *oidp)
switch (oidp->oid_kind & CTLTYPE) {
case CTLTYPE_INT:
- if (getenv_int(path + rem, &val_int) == 0)
+ if (getenv_array(path + rem, data, sizeof(data), &size,
+ sizeof(int), GETENV_SIGNED) == 0)
return;
- req.newlen = sizeof(val_int);
- req.newptr = &val_int;
+ req.newlen = size;
+ req.newptr = data;
break;
case CTLTYPE_UINT:
- if (getenv_uint(path + rem, (unsigned int *)&val_int) == 0)
+ if (getenv_array(path + rem, data, sizeof(data), &size,
+ sizeof(int), GETENV_UNSIGNED) == 0)
return;
- req.newlen = sizeof(val_int);
- req.newptr = &val_int;
+ req.newlen = size;
+ req.newptr = data;
break;
case CTLTYPE_LONG:
- if (getenv_long(path + rem, &val_long) == 0)
+ if (getenv_array(path + rem, data, sizeof(data), &size,
+ sizeof(long), GETENV_SIGNED) == 0)
return;
- req.newlen = sizeof(val_long);
- req.newptr = &val_long;
+ req.newlen = size;
+ req.newptr = data;
break;
case CTLTYPE_ULONG:
- if (getenv_ulong(path + rem, (unsigned long *)&val_long) == 0)
+ if (getenv_array(path + rem, data, sizeof(data), &size,
+ sizeof(long), GETENV_UNSIGNED) == 0)
return;
- req.newlen = sizeof(val_long);
- req.newptr = &val_long;
+ req.newlen = size;
+ req.newptr = data;
break;
case CTLTYPE_S8:
- if (getenv_int(path + rem, &val_int) == 0)
+ if (getenv_array(path + rem, data, sizeof(data), &size,
+ sizeof(int8_t), GETENV_SIGNED) == 0)
return;
- val_8 = val_int;
- req.newlen = sizeof(val_8);
- req.newptr = &val_8;
+ req.newlen = size;
+ req.newptr = data;
break;
case CTLTYPE_S16:
- if (getenv_int(path + rem, &val_int) == 0)
+ if (getenv_array(path + rem, data, sizeof(data), &size,
+ sizeof(int16_t), GETENV_SIGNED) == 0)
return;
- val_16 = val_int;
- req.newlen = sizeof(val_16);
- req.newptr = &val_16;
+ req.newlen = size;
+ req.newptr = data;
break;
case CTLTYPE_S32:
- if (getenv_long(path + rem, &val_long) == 0)
+ if (getenv_array(path + rem, data, sizeof(data), &size,
+ sizeof(int32_t), GETENV_SIGNED) == 0)
return;
- val_32 = val_long;
- req.newlen = sizeof(val_32);
- req.newptr = &val_32;
+ req.newlen = size;
+ req.newptr = data;
break;
case CTLTYPE_S64:
- if (getenv_quad(path + rem, &val_quad) == 0)
+ if (getenv_array(path + rem, data, sizeof(data), &size,
+ sizeof(int64_t), GETENV_SIGNED) == 0)
return;
- val_64 = val_quad;
- req.newlen = sizeof(val_64);
- req.newptr = &val_64;
+ req.newlen = size;
+ req.newptr = data;
break;
case CTLTYPE_U8:
- if (getenv_uint(path + rem, (unsigned int *)&val_int) == 0)
+ if (getenv_array(path + rem, data, sizeof(data), &size,
+ sizeof(uint8_t), GETENV_UNSIGNED) == 0)
return;
- val_8 = val_int;
- req.newlen = sizeof(val_8);
- req.newptr = &val_8;
+ req.newlen = size;
+ req.newptr = data;
break;
case CTLTYPE_U16:
- if (getenv_uint(path + rem, (unsigned int *)&val_int) == 0)
+ if (getenv_array(path + rem, data, sizeof(data), &size,
+ sizeof(uint16_t), GETENV_UNSIGNED) == 0)
return;
- val_16 = val_int;
- req.newlen = sizeof(val_16);
- req.newptr = &val_16;
+ req.newlen = size;
+ req.newptr = data;
break;
case CTLTYPE_U32:
- if (getenv_ulong(path + rem, (unsigned long *)&val_long) == 0)
+ if (getenv_array(path + rem, data, sizeof(data), &size,
+ sizeof(uint32_t), GETENV_UNSIGNED) == 0)
return;
- val_32 = val_long;
- req.newlen = sizeof(val_32);
- req.newptr = &val_32;
+ req.newlen = size;
+ req.newptr = data;
break;
case CTLTYPE_U64:
- /* XXX there is no getenv_uquad() */
- if (getenv_quad(path + rem, &val_quad) == 0)
+ if (getenv_array(path + rem, data, sizeof(data), &size,
+ sizeof(uint64_t), GETENV_UNSIGNED) == 0)
return;
- val_64 = val_quad;
- req.newlen = sizeof(val_64);
- req.newptr = &val_64;
+ req.newlen = size;
+ req.newptr = data;
break;
case CTLTYPE_STRING:
penv = kern_getenv(path + rem);
diff --git a/freebsd/sys/kern/kern_time.c b/freebsd/sys/kern/kern_time.c
index a9c0547a..74b144cb 100644
--- a/freebsd/sys/kern/kern_time.c
+++ b/freebsd/sys/kern/kern_time.c
@@ -288,6 +288,8 @@ get_process_cputime(struct proc *targetp, struct timespec *ats)
PROC_STATLOCK(targetp);
rufetch(targetp, &ru);
runtime = targetp->p_rux.rux_runtime;
+ if (curthread->td_proc == targetp)
+ runtime += cpu_ticks() - PCPU_GET(switchtime);
PROC_STATUNLOCK(targetp);
cputick2timespec(runtime, ats);
}
@@ -1577,7 +1579,7 @@ realtimer_settime(struct itimer *it, int flags,
if ((flags & TIMER_ABSTIME) == 0) {
/* Convert to absolute time. */
timespecadd(&it->it_time.it_value, &cts,
- &it->it_time.it_value);
+ &it->it_time.it_value);
} else {
timespecsub(&ts, &cts, &ts);
/*
diff --git a/freebsd/sys/kern/subr_blist.c b/freebsd/sys/kern/subr_blist.c
index a7d78d86..e5b40e62 100644
--- a/freebsd/sys/kern/subr_blist.c
+++ b/freebsd/sys/kern/subr_blist.c
@@ -226,17 +226,19 @@ blist_create(daddr_t blocks, int flags)
u_daddr_t nodes, radix, skip;
int digit;
+ if (blocks == 0)
+ panic("invalid block count");
+
/*
- * Calculate the radix and node count used for scanning. Find the last
- * block that is followed by a terminator.
+ * Calculate the radix and node count used for scanning.
*/
last_block = blocks - 1;
radix = BLIST_BMAP_RADIX;
while (radix < blocks) {
if (((last_block / radix + 1) & BLIST_META_MASK) != 0)
/*
- * A terminator will be added. Update last_block to the
- * position just before that terminator.
+ * We must widen the blist to avoid partially
+ * filled nodes.
*/
last_block |= radix - 1;
radix *= BLIST_META_RADIX;
@@ -246,7 +248,9 @@ blist_create(daddr_t blocks, int flags)
* Count the meta-nodes in the expanded tree, including the final
* terminator, from the bottom level up to the root.
*/
- nodes = (last_block >= blocks) ? 2 : 1;
+ nodes = 1;
+ if (radix - blocks >= BLIST_BMAP_RADIX)
+ nodes++;
last_block /= BLIST_BMAP_RADIX;
while (last_block > 0) {
nodes += last_block + 1;
diff --git a/freebsd/sys/kern/subr_bus.c b/freebsd/sys/kern/subr_bus.c
index 0626ec0a..391b2ed6 100644
--- a/freebsd/sys/kern/subr_bus.c
+++ b/freebsd/sys/kern/subr_bus.c
@@ -53,6 +53,7 @@ __FBSDID("$FreeBSD$");
#include <machine/bus.h>
#include <sys/random.h>
#include <sys/rman.h>
+#include <sys/sbuf.h>
#include <sys/selinfo.h>
#include <sys/signalvar.h>
#include <sys/smp.h>
@@ -60,7 +61,6 @@ __FBSDID("$FreeBSD$");
#include <sys/systm.h>
#include <sys/uio.h>
#include <sys/bus.h>
-#include <sys/interrupt.h>
#include <sys/cpuset.h>
#include <net/vnet.h>
@@ -84,6 +84,8 @@ struct driverlink {
kobj_class_t driver;
TAILQ_ENTRY(driverlink) link; /* list of drivers in devclass */
int pass;
+ int flags;
+#define DL_DEFERRED_PROBE 1 /* Probe deferred on this */
TAILQ_ENTRY(driverlink) passlink;
};
@@ -155,6 +157,9 @@ EVENTHANDLER_LIST_DEFINE(device_detach);
EVENTHANDLER_LIST_DEFINE(dev_lookup);
static void devctl2_init(void);
+static bool device_frozen;
+#else /* __rtems__ */
+#define device_frozen false
#endif /* __rtems__ */
#define DRIVERNAME(d) ((d)? d->name : "no driver")
@@ -885,27 +890,18 @@ sysctl_devctl_queue(SYSCTL_HANDLER_ARGS)
* Strings are always terminated with a NUL, but may be truncated if longer
* than @p len bytes after quotes.
*
- * @param dst Buffer to hold the string. Must be at least @p len bytes long
+ * @param sb sbuf to place the characters into
* @param src Original buffer.
- * @param len Length of buffer pointed to by @dst, including trailing NUL
*/
void
-devctl_safe_quote(char *dst, const char *src, size_t len)
+devctl_safe_quote_sb(struct sbuf *sb, const char *src)
{
- char *walker = dst, *ep = dst + len - 1;
- if (len == 0)
- return;
- while (src != NULL && walker < ep)
- {
- if (*src == '"' || *src == '\\') {
- if (ep - walker < 2)
- break;
- *walker++ = '\\';
- }
- *walker++ = *src++;
+ while (*src != '\0') {
+ if (*src == '"' || *src == '\\')
+ sbuf_putc(sb, '\\');
+ sbuf_putc(sb, *src++);
}
- *walker = '\0';
}
/* End of /dev/devctl code */
@@ -1204,7 +1200,11 @@ devclass_add_driver(devclass_t dc, driver_t *driver, int pass, devclass_t *dcp)
dl->pass = pass;
driver_register_pass(dl);
- devclass_driver_added(dc, driver);
+ if (device_frozen) {
+ dl->flags |= DL_DEFERRED_PROBE;
+ } else {
+ devclass_driver_added(dc, driver);
+ }
bus_data_generation_update();
return (0);
}
@@ -1244,6 +1244,9 @@ devclass_driver_deleted(devclass_t busclass, devclass_t dc, driver_t *driver)
* Note that since a driver can be in multiple devclasses, we
* should not detach devices which are not children of devices in
* the affected devclass.
+ *
+ * If we're frozen, we don't generate NOMATCH events. Mark to
+ * generate later.
*/
for (i = 0; i < dc->maxunit; i++) {
if (dc->devices[i]) {
@@ -1252,9 +1255,14 @@ devclass_driver_deleted(devclass_t busclass, devclass_t dc, driver_t *driver)
dev->parent->devclass == busclass) {
if ((error = device_detach(dev)) != 0)
return (error);
- BUS_PROBE_NOMATCH(dev->parent, dev);
- devnomatch(dev);
- dev->flags |= DF_DONENOMATCH;
+ if (device_frozen) {
+ dev->flags &= ~DF_DONENOMATCH;
+ dev->flags |= DF_NEEDNOMATCH;
+ } else {
+ BUS_PROBE_NOMATCH(dev->parent, dev);
+ devnomatch(dev);
+ dev->flags |= DF_DONENOMATCH;
+ }
}
}
}
@@ -2958,6 +2966,7 @@ int
device_attach(device_t dev)
{
uint64_t attachtime;
+ uint16_t attachentropy;
int error;
#ifndef __rtems__
@@ -2985,19 +2994,12 @@ device_attach(device_t dev)
dev->state = DS_NOTPRESENT;
return (error);
}
- attachtime = get_cyclecount() - attachtime;
- /*
- * 4 bits per device is a reasonable value for desktop and server
- * hardware with good get_cyclecount() implementations, but WILL
- * need to be adjusted on other platforms.
+ dev->flags |= DF_ATTACHED_ONCE;
+ /* We only need the low bits of this time, but ranges from tens to thousands
+ * have been seen, so keep 2 bytes' worth.
*/
-#define RANDOM_PROBE_BIT_GUESS 4
- if (bootverbose)
- printf("random: harvesting attach, %zu bytes (%d bits) from %s%d\n",
- sizeof(attachtime), RANDOM_PROBE_BIT_GUESS,
- dev->driver->name, dev->unit);
- random_harvest_direct(&attachtime, sizeof(attachtime),
- RANDOM_PROBE_BIT_GUESS, RANDOM_ATTACH);
+ attachentropy = (uint16_t)(get_cyclecount() - attachtime);
+ random_harvest_direct(&attachentropy, sizeof(attachentropy), RANDOM_ATTACH);
device_sysctl_update(dev);
if (dev->busy)
dev->state = DS_BUSY;
@@ -5474,6 +5476,53 @@ driver_exists(device_t bus, const char *driver)
return (false);
}
+static void
+device_gen_nomatch(device_t dev)
+{
+ device_t child;
+
+ if (dev->flags & DF_NEEDNOMATCH &&
+ dev->state == DS_NOTPRESENT) {
+ BUS_PROBE_NOMATCH(dev->parent, dev);
+ devnomatch(dev);
+ dev->flags |= DF_DONENOMATCH;
+ }
+ dev->flags &= ~DF_NEEDNOMATCH;
+ TAILQ_FOREACH(child, &dev->children, link) {
+ device_gen_nomatch(child);
+ }
+}
+
+static void
+device_do_deferred_actions(void)
+{
+ devclass_t dc;
+ driverlink_t dl;
+
+ /*
+ * Walk through the devclasses to find all the drivers we've tagged as
+ * deferred during the freeze and call the driver added routines. They
+ * have already been added to the lists in the background, so the driver
+ * added routines that trigger a probe will have all the right bidders
+ * for the probe auction.
+ */
+ TAILQ_FOREACH(dc, &devclasses, link) {
+ TAILQ_FOREACH(dl, &dc->drivers, link) {
+ if (dl->flags & DL_DEFERRED_PROBE) {
+ devclass_driver_added(dc, dl->driver);
+ dl->flags &= ~DL_DEFERRED_PROBE;
+ }
+ }
+ }
+
+ /*
+ * We also defer no-match events during a freeze. Walk the tree and
+ * generate all the pent-up events that are still relevant.
+ */
+ device_gen_nomatch(root_bus);
+ bus_data_generation_update();
+}
+
static int
devctl2_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
struct thread *td)
@@ -5500,6 +5549,10 @@ devctl2_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
if (error == 0)
error = find_device(req, &dev);
break;
+ case DEV_FREEZE:
+ case DEV_THAW:
+ error = priv_check(td, PRIV_DRIVER);
+ break;
default:
error = ENOTTY;
break;
@@ -5703,7 +5756,23 @@ devctl2_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag,
error = device_delete_child(parent, dev);
break;
}
+#ifndef __rtems__
+ case DEV_FREEZE:
+ if (device_frozen)
+ error = EBUSY;
+ else
+ device_frozen = true;
+ break;
+ case DEV_THAW:
+ if (!device_frozen)
+ error = EBUSY;
+ else {
+ device_do_deferred_actions();
+ device_frozen = false;
+ }
+ break;
}
+#endif /* __rtems__ */
mtx_unlock(&Giant);
return (error);
}
diff --git a/freebsd/sys/kern/subr_counter.c b/freebsd/sys/kern/subr_counter.c
index e4c98fae..66cda02b 100644
--- a/freebsd/sys/kern/subr_counter.c
+++ b/freebsd/sys/kern/subr_counter.c
@@ -44,7 +44,7 @@ __FBSDID("$FreeBSD$");
#define IN_SUBR_COUNTER_C
#include <sys/counter.h>
-
+
void
counter_u64_zero(counter_u64_t c)
{
@@ -62,20 +62,15 @@ counter_u64_fetch(counter_u64_t c)
counter_u64_t
counter_u64_alloc(int flags)
{
- counter_u64_t r;
-
- r = uma_zalloc(pcpu_zone_64, flags);
- if (r != NULL)
- counter_u64_zero(r);
- return (r);
+ return (uma_zalloc_pcpu(pcpu_zone_64, flags | M_ZERO));
}
void
counter_u64_free(counter_u64_t c)
{
- uma_zfree(pcpu_zone_64, c);
+ uma_zfree_pcpu(pcpu_zone_64, c);
}
int
@@ -142,7 +137,7 @@ counter_ratecheck(struct counter_rate *cr, int64_t limit)
val = cr->cr_over;
now = ticks;
- if (now - cr->cr_ticks >= hz) {
+ if ((u_int)(now - cr->cr_ticks) >= hz) {
/*
* Time to clear the structure, we are in the next second.
* First try unlocked read, and then proceed with atomic.
@@ -153,7 +148,7 @@ counter_ratecheck(struct counter_rate *cr, int64_t limit)
* Check if other thread has just went through the
* reset sequence before us.
*/
- if (now - cr->cr_ticks >= hz) {
+ if ((u_int)(now - cr->cr_ticks) >= hz) {
val = counter_u64_fetch(cr->cr_rate);
counter_u64_zero(cr->cr_rate);
cr->cr_over = 0;
diff --git a/freebsd/sys/kern/subr_gtaskqueue.c b/freebsd/sys/kern/subr_gtaskqueue.c
index aa5c922d..e56e90d7 100644
--- a/freebsd/sys/kern/subr_gtaskqueue.c
+++ b/freebsd/sys/kern/subr_gtaskqueue.c
@@ -921,6 +921,24 @@ taskqgroup_bind(struct taskqgroup *qgroup)
}
}
+static void
+taskqgroup_config_init(void *arg)
+{
+ struct taskqgroup *qgroup = qgroup_config;
+ LIST_HEAD(, grouptask) gtask_head = LIST_HEAD_INITIALIZER(NULL);
+
+ LIST_SWAP(&gtask_head, &qgroup->tqg_queue[0].tgc_tasks,
+ grouptask, gt_list);
+ qgroup->tqg_queue[0].tgc_cnt = 0;
+ taskqgroup_cpu_create(qgroup, 0, 0);
+
+ qgroup->tqg_cnt = 1;
+ qgroup->tqg_stride = 1;
+}
+
+SYSINIT(taskqgroup_config_init, SI_SUB_TASKQ, SI_ORDER_SECOND,
+ taskqgroup_config_init, NULL);
+
static int
_taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride)
{
diff --git a/freebsd/sys/kern/subr_hints.c b/freebsd/sys/kern/subr_hints.c
index 982059c3..78ab1b4e 100644
--- a/freebsd/sys/kern/subr_hints.c
+++ b/freebsd/sys/kern/subr_hints.c
@@ -33,228 +33,257 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/lock.h>
+#include <sys/kernel.h>
#include <sys/malloc.h>
#include <sys/mutex.h>
#include <sys/sysctl.h>
#include <sys/systm.h>
#include <sys/bus.h>
-/*
- * Access functions for device resources.
- */
-
#ifndef __rtems__
-static int checkmethod = 1;
-static int use_kenv;
-static char *hintp;
-#else /* __rtems__ */
-#define hintmode 1
-#define hintp static_hints
-#define use_kenv 0
-static char __used default_static_hints[] = "";
-__weak_reference(default_static_hints, static_hints);
-#endif /* __rtems__ */
+#define FBACK_MDENV 0 /* MD env (e.g. loader.conf) */
+#define FBACK_STENV 1 /* Static env */
+#define FBACK_STATIC 2 /* static_hints */
-#ifndef __rtems__
/*
- * Define kern.hintmode sysctl, which only accept value 2, that cause to
- * switch from Static KENV mode to Dynamic KENV. So systems that have hints
- * compiled into kernel will be able to see/modify KENV (and hints too).
+ * We'll use hintenv_merged to indicate that the dynamic environment has been
+ * properly prepared for hint usage. This implies that the dynamic environment
+ * has already been setup (dynamic_kenv) and that we have added any supplied
+ * static_hints to the dynamic environment.
+ */
+static bool hintenv_merged;
+/* Static environment and static hints cannot change, so we'll skip known bad */
+static bool stenv_skip;
+static bool sthints_skip;
+/*
+ * Access functions for device resources.
*/
-static int
-sysctl_hintmode(SYSCTL_HANDLER_ARGS)
+static void
+static_hints_to_env(void *data __unused)
{
const char *cp;
char *line, *eq;
- int eqidx, error, from_kenv, i, value;
-
- from_kenv = 0;
- cp = kern_envp;
- value = hintmode;
-
- /* Fetch candidate for new hintmode value */
- error = sysctl_handle_int(oidp, &value, 0, req);
- if (error || req->newptr == NULL)
- return (error);
-
- if (value != 2)
- /* Only accept swithing to hintmode 2 */
- return (EINVAL);
-
- /* Migrate from static to dynamic hints */
- switch (hintmode) {
- case 0:
- if (dynamic_kenv) {
- /*
- * Already here. But assign hintmode to 2, to not
- * check it in the future.
- */
- hintmode = 2;
- return (0);
- }
- from_kenv = 1;
- cp = kern_envp;
- break;
- case 1:
- cp = static_hints;
- break;
- case 2:
- /* Nothing to do, hintmode already 2 */
- return (0);
- }
+ int eqidx, i;
- while (cp) {
- i = strlen(cp);
- if (i == 0)
- break;
- if (from_kenv) {
- if (strncmp(cp, "hint.", 5) != 0)
- /* kenv can have not only hints */
- continue;
- }
+ cp = static_hints;
+ while (cp && *cp != '\0') {
eq = strchr(cp, '=');
if (eq == NULL)
/* Bad hint value */
continue;
eqidx = eq - cp;
- line = malloc(i+1, M_TEMP, M_WAITOK);
+ i = strlen(cp);
+ line = malloc(i + 1, M_TEMP, M_WAITOK);
strcpy(line, cp);
- line[eqidx] = '\0';
- kern_setenv(line, line + eqidx + 1);
+ line[eqidx] = line[i] = '\0';
+ /*
+ * Before adding a hint to the dynamic environment, check if
+ * another value for said hint has already been added. This is
+ * needed because static environment overrides static hints and
+ * dynamic environment overrides all.
+ */
+ if (testenv(line) == 0)
+ kern_setenv(line, line + eqidx + 1);
free(line, M_TEMP);
cp += i + 1;
}
-
- hintmode = value;
- use_kenv = 1;
- return (0);
+ hintenv_merged = true;
}
-SYSCTL_PROC(_kern, OID_AUTO, hintmode, CTLTYPE_INT|CTLFLAG_RW,
- &hintmode, 0, sysctl_hintmode, "I", "Get/set current hintmode");
+/* Any time after dynamic env is setup */
+SYSINIT(hintenv, SI_SUB_KMEM + 1, SI_ORDER_SECOND, static_hints_to_env, NULL);
+#else /* __rtems__ */
+#define sthints_skip false
+
+static char __used default_static_hints[] = "";
+__weak_reference(default_static_hints, static_hints);
#endif /* __rtems__ */
/*
+ * Checks the environment to see if we even have any hints. If it has no hints,
+ * then res_find can take the hint that there's no point in searching it and
+ * either move on to the next environment or fail early.
+ */
+static bool
+_res_checkenv(char *envp)
+{
+ char *cp;
+
+ cp = envp;
+ while (cp) {
+ if (strncmp(cp, "hint.", 5) == 0)
+ return (true);
+ while (*cp != '\0')
+ cp++;
+ cp++;
+ if (*cp == '\0')
+ break;
+ }
+ return (false);
+}
+
+/*
* Evil wildcarding resource string lookup.
* This walks the supplied env string table and returns a match.
* The start point can be remembered for incremental searches.
*/
static int
-res_find(int *line, int *startln,
+res_find(char **hintp_cookie, int *line, int *startln,
const char *name, int *unit, const char *resname, const char *value,
const char **ret_name, int *ret_namelen, int *ret_unit,
const char **ret_resname, int *ret_resnamelen, const char **ret_value)
{
- int n = 0, hit, i = 0;
+#ifndef __rtems__
+ int fbacklvl = FBACK_MDENV, i = 0, n = 0;
+#else /* __rtems__ */
+ int n = 0;
+#endif /* __rtems__ */
char r_name[32];
int r_unit;
char r_resname[32];
char r_value[128];
const char *s, *cp;
- char *p;
-
+ char *hintp, *p;
#ifndef __rtems__
- if (checkmethod) {
- hintp = NULL;
+ bool dyn_used = false;
- switch (hintmode) {
- case 0: /* loader hints in environment only */
- break;
- case 1: /* static hints only */
- hintp = static_hints;
- checkmethod = 0;
- break;
- case 2: /* fallback mode */
- if (dynamic_kenv) {
- mtx_lock(&kenv_lock);
- cp = kenvp[0];
- for (i = 0; cp != NULL; cp = kenvp[++i]) {
- if (!strncmp(cp, "hint.", 5)) {
- use_kenv = 1;
- checkmethod = 0;
- break;
- }
+
+ /*
+ * We are expecting that the caller will pass us a hintp_cookie that
+ * they are tracking. Upon entry, if *hintp_cookie is *not* set, this
+ * indicates to us that we should be figuring out based on the current
+ * environment where to search. This keeps us sane throughout the
+ * entirety of a single search.
+ */
+ if (*hintp_cookie == NULL) {
+ hintp = NULL;
+ if (hintenv_merged) {
+ /*
+ * static_hints, if it was previously used, has
+ * already been folded in to the environment
+ * by this point.
+ */
+ mtx_lock(&kenv_lock);
+ cp = kenvp[0];
+ for (i = 0; cp != NULL; cp = kenvp[++i]) {
+ if (!strncmp(cp, "hint.", 5)) {
+ hintp = kenvp[0];
+ break;
}
- mtx_unlock(&kenv_lock);
- } else {
- cp = kern_envp;
- while (cp) {
- if (strncmp(cp, "hint.", 5) == 0) {
- cp = NULL;
- hintp = kern_envp;
- break;
- }
- while (*cp != '\0')
- cp++;
- cp++;
- if (*cp == '\0') {
- cp = NULL;
- hintp = static_hints;
- break;
- }
+ }
+ mtx_unlock(&kenv_lock);
+ dyn_used = true;
+ } else {
+ /*
+ * We'll have a chance to keep coming back here until
+ * we've actually exhausted all of our possibilities.
+ * We might have chosen the MD/Static env because it
+ * had some kind of hints, but perhaps it didn't have
+ * the hint we are looking for. We don't provide any
+ * fallback when searching the dynamic environment.
+ */
+fallback:
+ if (dyn_used || fbacklvl >= FBACK_STATIC)
+ return (ENOENT);
+
+ switch (fbacklvl) {
+ case FBACK_MDENV:
+ fbacklvl++;
+ if (_res_checkenv(md_envp)) {
+ hintp = md_envp;
+ break;
}
+
+ /* FALLTHROUGH */
+ case FBACK_STENV:
+ fbacklvl++;
+ if (!stenv_skip && _res_checkenv(kern_envp)) {
+ hintp = kern_envp;
+ break;
+ } else
+ stenv_skip = true;
+
+ /* FALLTHROUGH */
+ case FBACK_STATIC:
+ fbacklvl++;
+#else /* __rtems__ */
+ hintp = NULL;
+#endif /* __rtems__ */
+ /* We'll fallback to static_hints if needed/can */
+ if (!sthints_skip &&
+ _res_checkenv(static_hints))
+ hintp = static_hints;
+#ifndef __rtems__
+ else
+ sthints_skip = true;
+
+ break;
+ default:
+ return (ENOENT);
}
- break;
- default:
- break;
- }
- if (hintp == NULL) {
- if (dynamic_kenv) {
- use_kenv = 1;
- checkmethod = 0;
- } else
- hintp = kern_envp;
}
+#endif /* __rtems__ */
+
+ if (hintp == NULL)
+ return (ENOENT);
+ *hintp_cookie = hintp;
+#ifndef __rtems__
+ } else {
+ hintp = *hintp_cookie;
+ if (hintenv_merged && hintp == kenvp[0])
+ dyn_used = true;
+ else
+ /*
+ * If we aren't using the dynamic environment, we need
+ * to run through the proper fallback procedure again.
+ * This is so that we do continuations right if we're
+ * working with *line and *startln.
+ */
+ goto fallback;
}
- if (use_kenv) {
+ if (dyn_used) {
mtx_lock(&kenv_lock);
i = 0;
- cp = kenvp[0];
- if (cp == NULL) {
- mtx_unlock(&kenv_lock);
- return (ENOENT);
- }
- } else
+ }
#endif /* __rtems__ */
- cp = hintp;
+
+ cp = hintp;
while (cp) {
- hit = 1;
(*line)++;
if (strncmp(cp, "hint.", 5) != 0)
- hit = 0;
- else
- n = sscanf(cp, "hint.%32[^.].%d.%32[^=]=%127s",
- r_name, &r_unit, r_resname, r_value);
- if (hit && n != 4) {
+ goto nexthint;
+ n = sscanf(cp, "hint.%32[^.].%d.%32[^=]=%127s", r_name, &r_unit,
+ r_resname, r_value);
+ if (n != 4) {
printf("CONFIG: invalid hint '%s'\n", cp);
p = strchr(cp, 'h');
*p = 'H';
- hit = 0;
+ goto nexthint;
}
- if (hit && startln && *startln >= 0 && *line < *startln)
- hit = 0;
- if (hit && name && strcmp(name, r_name) != 0)
- hit = 0;
- if (hit && unit && *unit != r_unit)
- hit = 0;
- if (hit && resname && strcmp(resname, r_resname) != 0)
- hit = 0;
- if (hit && value && strcmp(value, r_value) != 0)
- hit = 0;
- if (hit)
- break;
- if (use_kenv) {
+ if (startln && *startln >= 0 && *line < *startln)
+ goto nexthint;
+ if (name && strcmp(name, r_name) != 0)
+ goto nexthint;
+ if (unit && *unit != r_unit)
+ goto nexthint;
+ if (resname && strcmp(resname, r_resname) != 0)
+ goto nexthint;
+ if (value && strcmp(value, r_value) != 0)
+ goto nexthint;
+ /* Successfully found a hint matching all criteria */
+ break;
+nexthint:
#ifndef __rtems__
+ if (dyn_used) {
cp = kenvp[++i];
if (cp == NULL)
break;
-#else /* __rtems__ */
(void) i;
-#endif /* __rtems__ */
} else {
+#endif /* __rtems__ */
while (*cp != '\0')
cp++;
cp++;
@@ -262,14 +291,20 @@ res_find(int *line, int *startln,
cp = NULL;
break;
}
+#ifndef __rtems__
}
+#endif /* __rtems__ */
}
#ifndef __rtems__
- if (use_kenv)
+ if (dyn_used)
mtx_unlock(&kenv_lock);
#endif /* __rtems__ */
if (cp == NULL)
- return ENOENT;
+#ifndef __rtems__
+ goto fallback;
+#else /* __rtems__ */
+ return (ENOENT);
+#endif /* __rtems__ */
s = cp;
/* This is a bit of a hack, but at least is reentrant */
@@ -307,11 +342,13 @@ resource_find(int *line, int *startln,
{
int i;
int un;
+ char *hintp;
*line = 0;
+ hintp = NULL;
/* Search for exact unit matches first */
- i = res_find(line, startln, name, unit, resname, value,
+ i = res_find(&hintp, line, startln, name, unit, resname, value,
ret_name, ret_namelen, ret_unit, ret_resname, ret_resnamelen,
ret_value);
if (i == 0)
@@ -320,7 +357,7 @@ resource_find(int *line, int *startln,
return ENOENT;
/* If we are still here, search for wildcard matches */
un = -1;
- i = res_find(line, startln, name, &un, resname, value,
+ i = res_find(&hintp, line, startln, name, &un, resname, value,
ret_name, ret_namelen, ret_unit, ret_resname, ret_resnamelen,
ret_value);
if (i == 0)
diff --git a/freebsd/sys/kern/subr_module.c b/freebsd/sys/kern/subr_module.c
index d8d42653..21b2754c 100644
--- a/freebsd/sys/kern/subr_module.c
+++ b/freebsd/sys/kern/subr_module.c
@@ -35,6 +35,9 @@ __FBSDID("$FreeBSD$");
#include <sys/systm.h>
#include <sys/linker.h>
+#include <vm/vm.h>
+#include <vm/vm_extern.h>
+
/*
* Preloaded module support
*/
@@ -206,29 +209,42 @@ preload_search_info(caddr_t mod, int inf)
void
preload_delete_name(const char *name)
{
- caddr_t curp;
- uint32_t *hdr;
+ caddr_t addr, curp;
+ uint32_t *hdr, sz;
int next;
int clearing;
+
+ addr = 0;
+ sz = 0;
if (preload_metadata != NULL) {
-
+
clearing = 0;
curp = preload_metadata;
for (;;) {
hdr = (uint32_t *)curp;
- if (hdr[0] == 0 && hdr[1] == 0)
- break;
-
- /* Search for a MODINFO_NAME field */
- if (hdr[0] == MODINFO_NAME) {
+ if (hdr[0] == MODINFO_NAME || (hdr[0] == 0 && hdr[1] == 0)) {
+ /* Free memory used to store the file. */
+ if (addr != 0 && sz != 0)
+ kmem_bootstrap_free((vm_offset_t)addr, sz);
+ addr = 0;
+ sz = 0;
+
+ if (hdr[0] == 0)
+ break;
if (!strcmp(name, curp + sizeof(uint32_t) * 2))
clearing = 1; /* got it, start clearing */
- else if (clearing)
+ else if (clearing) {
clearing = 0; /* at next one now.. better stop */
+ }
}
- if (clearing)
+ if (clearing) {
+ if (hdr[0] == MODINFO_ADDR)
+ addr = *(caddr_t *)(curp + sizeof(uint32_t) * 2);
+ else if (hdr[0] == MODINFO_SIZE)
+ sz = *(uint32_t *)(curp + sizeof(uint32_t) * 2);
hdr[0] = MODINFO_EMPTY;
+ }
/* skip to next field */
next = sizeof(uint32_t) * 2 + hdr[1];
diff --git a/freebsd/sys/kern/subr_pcpu.c b/freebsd/sys/kern/subr_pcpu.c
index 1b866e3a..0ab77996 100644
--- a/freebsd/sys/kern/subr_pcpu.c
+++ b/freebsd/sys/kern/subr_pcpu.c
@@ -75,7 +75,7 @@ struct dpcpu_free {
TAILQ_ENTRY(dpcpu_free) df_link;
};
-static DPCPU_DEFINE(char, modspace[DPCPU_MODMIN]);
+DPCPU_DEFINE_STATIC(char, modspace[DPCPU_MODMIN] __aligned(__alignof(void *)));
static TAILQ_HEAD(, dpcpu_free) dpcpu_head = TAILQ_HEAD_INITIALIZER(dpcpu_head);
static struct sx dpcpu_lock;
uintptr_t dpcpu_off[MAXCPU];
diff --git a/freebsd/sys/kern/subr_prf.c b/freebsd/sys/kern/subr_prf.c
index 4c45bcfe..6e719897 100644
--- a/freebsd/sys/kern/subr_prf.c
+++ b/freebsd/sys/kern/subr_prf.c
@@ -135,10 +135,22 @@ static char *ksprintn(char *nbuf, uintmax_t num, int base, int *len, int upper);
static void snprintf_func(int ch, void *arg);
#ifndef __rtems__
-static int msgbufmapped; /* Set when safe to use msgbuf */
+static bool msgbufmapped; /* Set when safe to use msgbuf */
int msgbuftrigger;
struct msgbuf *msgbufp;
+#ifndef BOOT_TAG_SZ
+#define BOOT_TAG_SZ 32
+#endif
+#ifndef BOOT_TAG
+/* Tag used to mark the start of a boot in dmesg */
+#define BOOT_TAG "---<<BOOT>>---"
+#endif
+
+static char current_boot_tag[BOOT_TAG_SZ + 1] = BOOT_TAG;
+SYSCTL_STRING(_kern, OID_AUTO, boot_tag, CTLFLAG_RDTUN | CTLFLAG_NOFETCH,
+ current_boot_tag, 0, "Tag added to dmesg at start of boot");
+
static int log_console_output = 1;
SYSCTL_INT(_kern, OID_AUTO, log_console_output, CTLFLAG_RWTUN,
&log_console_output, 0, "Duplicate console output to the syslog");
@@ -743,6 +755,7 @@ reswitch: switch (ch = (u_char)*fmt++) {
padc = '0';
goto reswitch;
}
+ /* FALLTHROUGH */
case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
for (n = 0;; ++fmt) {
@@ -1057,14 +1070,22 @@ msgbufinit(void *ptr, int size)
{
char *cp;
static struct msgbuf *oldp = NULL;
+ bool print_boot_tag;
size -= sizeof(*msgbufp);
cp = (char *)ptr;
+ print_boot_tag = !msgbufmapped;
+ /* Attempt to fetch kern.boot_tag tunable on first mapping */
+ if (!msgbufmapped)
+ TUNABLE_STR_FETCH("kern.boot_tag", current_boot_tag,
+ sizeof(current_boot_tag));
msgbufp = (struct msgbuf *)(cp + size);
msgbuf_reinit(msgbufp, cp, size);
if (msgbufmapped && oldp != msgbufp)
msgbuf_copy(oldp, msgbufp);
- msgbufmapped = 1;
+ msgbufmapped = true;
+ if (print_boot_tag && *current_boot_tag != '\0')
+ printf("%s\n", current_boot_tag);
oldp = msgbufp;
}
diff --git a/freebsd/sys/kern/sys_pipe.c b/freebsd/sys/kern/sys_pipe.c
index e527495a..8eb0aad9 100755
--- a/freebsd/sys/kern/sys_pipe.c
+++ b/freebsd/sys/kern/sys_pipe.c
@@ -572,9 +572,7 @@ pipe(int fildes[2])
* If it fails it will return ENOMEM.
*/
static int
-pipespace_new(cpipe, size)
- struct pipe *cpipe;
- int size;
+pipespace_new(struct pipe *cpipe, int size)
{
caddr_t buffer;
int error, cnt, firstseg;
@@ -646,9 +644,7 @@ retry:
* Wrapper for pipespace_new() that performs locking assertions.
*/
static int
-pipespace(cpipe, size)
- struct pipe *cpipe;
- int size;
+pipespace(struct pipe *cpipe, int size)
{
KASSERT(cpipe->pipe_state & PIPE_LOCKFL,
@@ -660,9 +656,7 @@ pipespace(cpipe, size)
* lock a pipe for I/O, blocking other access
*/
static __inline int
-pipelock(cpipe, catch)
- struct pipe *cpipe;
- int catch;
+pipelock(struct pipe *cpipe, int catch)
{
int error;
@@ -683,8 +677,7 @@ pipelock(cpipe, catch)
* unlock a pipe I/O lock
*/
static __inline void
-pipeunlock(cpipe)
- struct pipe *cpipe;
+pipeunlock(struct pipe *cpipe)
{
PIPE_LOCK_ASSERT(cpipe, MA_OWNED);
@@ -698,8 +691,7 @@ pipeunlock(cpipe)
}
void
-pipeselwakeup(cpipe)
- struct pipe *cpipe;
+pipeselwakeup(struct pipe *cpipe)
{
PIPE_LOCK_ASSERT(cpipe, MA_OWNED);
@@ -720,9 +712,7 @@ pipeselwakeup(cpipe)
* will start out zero'd from the ctor, so we just manage the kmem.
*/
static void
-pipe_create(pipe, backing)
- struct pipe *pipe;
- int backing;
+pipe_create(struct pipe *pipe, int backing)
{
if (backing) {
@@ -744,12 +734,8 @@ pipe_create(pipe, backing)
/* ARGSUSED */
static int
-pipe_read(fp, uio, active_cred, flags, td)
- struct file *fp;
- struct uio *uio;
- struct ucred *active_cred;
- struct thread *td;
- int flags;
+pipe_read(struct file *fp, struct uio *uio, struct ucred *active_cred,
+ int flags, struct thread *td)
{
struct pipe *rpipe;
int error;
@@ -995,9 +981,7 @@ rtems_bsd_pipe_readv(rtems_libio_t *iop, const struct iovec *iov,
* This is similar to a physical write operation.
*/
static int
-pipe_build_write_buffer(wpipe, uio)
- struct pipe *wpipe;
- struct uio *uio;
+pipe_build_write_buffer(struct pipe *wpipe, struct uio *uio)
{
u_int size;
int i;
@@ -1041,8 +1025,7 @@ pipe_build_write_buffer(wpipe, uio)
* unmap and unwire the process buffer
*/
static void
-pipe_destroy_write_buffer(wpipe)
- struct pipe *wpipe;
+pipe_destroy_write_buffer(struct pipe *wpipe)
{
PIPE_LOCK_ASSERT(wpipe, MA_OWNED);
@@ -1056,8 +1039,7 @@ pipe_destroy_write_buffer(wpipe)
* pages can be freed without loss of data.
*/
static void
-pipe_clone_write_buffer(wpipe)
- struct pipe *wpipe;
+pipe_clone_write_buffer(struct pipe *wpipe)
{
struct uio uio;
struct iovec iov;
@@ -1096,9 +1078,7 @@ pipe_clone_write_buffer(wpipe)
* the pipe buffer. Then the direct mapping write is set-up.
*/
static int
-pipe_direct_write(wpipe, uio)
- struct pipe *wpipe;
- struct uio *uio;
+pipe_direct_write(struct pipe *wpipe, struct uio *uio)
{
int error;
@@ -1197,12 +1177,8 @@ error1:
#endif
static int
-pipe_write(fp, uio, active_cred, flags, td)
- struct file *fp;
- struct uio *uio;
- struct ucred *active_cred;
- struct thread *td;
- int flags;
+pipe_write(struct file *fp, struct uio *uio, struct ucred *active_cred,
+ int flags, struct thread *td)
{
int error = 0;
int desiredsize;
@@ -1553,11 +1529,8 @@ rtems_bsd_pipe_writev(rtems_libio_t *iop, const struct iovec *iov,
/* ARGSUSED */
#ifndef __rtems__
static int
-pipe_truncate(fp, length, active_cred, td)
- struct file *fp;
- off_t length;
- struct ucred *active_cred;
- struct thread *td;
+pipe_truncate(struct file *fp, off_t length, struct ucred *active_cred,
+ struct thread *td)
{
struct pipe *cpipe;
int error;
@@ -1575,12 +1548,8 @@ pipe_truncate(fp, length, active_cred, td)
* we implement a very minimal set of ioctls for compatibility with sockets.
*/
static int
-pipe_ioctl(fp, cmd, data, active_cred, td)
- struct file *fp;
- u_long cmd;
- void *data;
- struct ucred *active_cred;
- struct thread *td;
+pipe_ioctl(struct file *fp, u_long cmd, void *data, struct ucred *active_cred,
+ struct thread *td)
{
struct pipe *mpipe = fp->f_data;
int error;
@@ -1672,11 +1641,8 @@ rtems_bsd_pipe_ioctl(rtems_libio_t *iop, ioctl_command_t request, void *buffer)
#endif /* __rtems__ */
static int
-pipe_poll(fp, events, active_cred, td)
- struct file *fp;
- int events;
- struct ucred *active_cred;
- struct thread *td;
+pipe_poll(struct file *fp, int events, struct ucred *active_cred,
+ struct thread *td)
{
struct pipe *rpipe;
struct pipe *wpipe;
@@ -1786,11 +1752,8 @@ rtems_bsd_pipe_poll(rtems_libio_t *iop, int events)
*/
#ifndef __rtems__
static int
-pipe_stat(fp, ub, active_cred, td)
- struct file *fp;
- struct stat *ub;
- struct ucred *active_cred;
- struct thread *td;
+pipe_stat(struct file *fp, struct stat *ub, struct ucred *active_cred,
+ struct thread *td)
{
struct pipe *pipe;
#else /* __rtems__ */
@@ -1889,9 +1852,7 @@ rtems_bsd_pipe_stat(
/* ARGSUSED */
static int
-pipe_close(fp, td)
- struct file *fp;
- struct thread *td;
+pipe_close(struct file *fp, struct thread *td)
{
#ifndef __rtems__
@@ -1922,12 +1883,8 @@ pipe_chmod(struct file *fp, mode_t mode, struct ucred *active_cred, struct threa
}
static int
-pipe_chown(fp, uid, gid, active_cred, td)
- struct file *fp;
- uid_t uid;
- gid_t gid;
- struct ucred *active_cred;
- struct thread *td;
+pipe_chown(struct file *fp, uid_t uid, gid_t gid, struct ucred *active_cred,
+ struct thread *td)
{
struct pipe *cpipe;
int error;
@@ -1957,8 +1914,7 @@ pipe_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp)
#endif /* __rtems__ */
static void
-pipe_free_kmem(cpipe)
- struct pipe *cpipe;
+pipe_free_kmem(struct pipe *cpipe)
{
KASSERT(!mtx_owned(PIPE_MTX(cpipe)),
@@ -1988,8 +1944,7 @@ pipe_free_kmem(cpipe)
* shutdown the pipe
*/
static void
-pipeclose(cpipe)
- struct pipe *cpipe;
+pipeclose(struct pipe *cpipe)
{
struct pipepair *pp;
struct pipe *ppipe;
diff --git a/freebsd/sys/kern/uipc_sockbuf.c b/freebsd/sys/kern/uipc_sockbuf.c
index ec493c04..cf99c615 100644
--- a/freebsd/sys/kern/uipc_sockbuf.c
+++ b/freebsd/sys/kern/uipc_sockbuf.c
@@ -961,23 +961,14 @@ sbappendaddr(struct sockbuf *sb, const struct sockaddr *asa,
return (retval);
}
-int
+void
sbappendcontrol_locked(struct sockbuf *sb, struct mbuf *m0,
struct mbuf *control)
{
- struct mbuf *m, *n, *mlast;
- int space;
-
- SOCKBUF_LOCK_ASSERT(sb);
+ struct mbuf *m, *mlast;
- if (control == NULL)
- panic("sbappendcontrol_locked");
- space = m_length(control, &n) + m_length(m0, NULL);
-
- if (space > sbspace(sb))
- return (0);
m_clrprotoflags(m0);
- n->m_next = m0; /* concatenate data to control */
+ m_last(control)->m_next = m0;
SBLASTRECORDCHK(sb);
@@ -991,18 +982,15 @@ sbappendcontrol_locked(struct sockbuf *sb, struct mbuf *m0,
SBLASTMBUFCHK(sb);
SBLASTRECORDCHK(sb);
- return (1);
}
-int
+void
sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control)
{
- int retval;
SOCKBUF_LOCK(sb);
- retval = sbappendcontrol_locked(sb, m0, control);
+ sbappendcontrol_locked(sb, m0, control);
SOCKBUF_UNLOCK(sb);
- return (retval);
}
/*
@@ -1289,6 +1277,63 @@ sbsndptr(struct sockbuf *sb, u_int off, u_int len, u_int *moff)
return (ret);
}
+struct mbuf *
+#ifndef __rtems__
+sbsndptr_noadv(struct sockbuf *sb, uint32_t off, uint32_t *moff)
+#else /* __rtems__ */
+sbsndptr_noadv(struct sockbuf *sb, u_int off, u_int *moff)
+#endif /* __rtems__ */
+{
+ struct mbuf *m;
+
+ KASSERT(sb->sb_mb != NULL, ("%s: sb_mb is NULL", __func__));
+ if (sb->sb_sndptr == NULL || sb->sb_sndptroff > off) {
+ *moff = off;
+ if (sb->sb_sndptr == NULL) {
+ sb->sb_sndptr = sb->sb_mb;
+ sb->sb_sndptroff = 0;
+ }
+ return (sb->sb_mb);
+ } else {
+ m = sb->sb_sndptr;
+ off -= sb->sb_sndptroff;
+ }
+ *moff = off;
+ return (m);
+}
+
+void
+#ifndef __rtems__
+sbsndptr_adv(struct sockbuf *sb, struct mbuf *mb, uint32_t len)
+#else /* __rtems__ */
+sbsndptr_adv(struct sockbuf *sb, struct mbuf *mb, u_int len)
+#endif /* __rtems__ */
+{
+ /*
+ * A small copy was done, advance forward the sb_sbsndptr to cover
+ * it.
+ */
+ struct mbuf *m;
+
+ if (mb != sb->sb_sndptr) {
+ /* Did not copyout at the same mbuf */
+ return;
+ }
+ m = mb;
+ while (m && (len > 0)) {
+ if (len >= m->m_len) {
+ len -= m->m_len;
+ if (m->m_next) {
+ sb->sb_sndptroff += m->m_len;
+ sb->sb_sndptr = m->m_next;
+ }
+ m = m->m_next;
+ } else {
+ len = 0;
+ }
+ }
+}
+
/*
* Return the first mbuf and the mbuf data offset for the provided
* send offset without changing the "sb_sndptroff" field.
diff --git a/freebsd/sys/kern/uipc_socket.c b/freebsd/sys/kern/uipc_socket.c
index e82642e4..3143a392 100644
--- a/freebsd/sys/kern/uipc_socket.c
+++ b/freebsd/sys/kern/uipc_socket.c
@@ -1126,6 +1126,8 @@ soclose(struct socket *so)
drop:
if (so->so_proto->pr_usrreqs->pru_close != NULL)
(*so->so_proto->pr_usrreqs->pru_close)(so);
+ if (so->so_dtor != NULL)
+ so->so_dtor(so);
SOCK_LOCK(so);
if ((listening = (so->so_options & SO_ACCEPTCONN))) {
@@ -2191,7 +2193,6 @@ release:
/*
* Optimized version of soreceive() for stream (TCP) sockets.
- * XXXAO: (MSG_WAITALL | MSG_PEEK) isn't properly handled.
*/
int
soreceive_stream(struct socket *so, struct sockaddr **psa, struct uio *uio,
@@ -2206,12 +2207,12 @@ soreceive_stream(struct socket *so, struct sockaddr **psa, struct uio *uio,
return (EINVAL);
if (psa != NULL)
*psa = NULL;
- if (controlp != NULL)
- return (EINVAL);
if (flagsp != NULL)
flags = *flagsp &~ MSG_EOR;
else
flags = 0;
+ if (controlp != NULL)
+ *controlp = NULL;
if (flags & MSG_OOB)
return (soreceive_rcvoob(so, uio, flags));
if (mp0 != NULL)
@@ -2815,6 +2816,7 @@ sosetopt(struct socket *so, struct sockopt *sopt)
case SO_BROADCAST:
case SO_REUSEADDR:
case SO_REUSEPORT:
+ case SO_REUSEPORT_LB:
case SO_OOBINLINE:
case SO_TIMESTAMP:
case SO_BINTIME:
@@ -3035,6 +3037,7 @@ sogetopt(struct socket *so, struct sockopt *sopt)
case SO_KEEPALIVE:
case SO_REUSEADDR:
case SO_REUSEPORT:
+ case SO_REUSEPORT_LB:
case SO_BROADCAST:
case SO_OOBINLINE:
case SO_ACCEPTCONN:
@@ -3046,6 +3049,10 @@ integer:
error = sooptcopyout(sopt, &optval, sizeof optval);
break;
+ case SO_DOMAIN:
+ optval = so->so_proto->pr_domain->dom_family;
+ goto integer;
+
case SO_TYPE:
optval = so->so_type;
goto integer;
@@ -3867,6 +3874,17 @@ sodupsockaddr(const struct sockaddr *sa, int mflags)
}
/*
+ * Register per-socket destructor.
+ */
+void
+sodtor_set(struct socket *so, so_dtor_t *func)
+{
+
+ SOCK_LOCK_ASSERT(so);
+ so->so_dtor = func;
+}
+
+/*
* Register per-socket buffer upcalls.
*/
void
@@ -4027,12 +4045,12 @@ sotoxsocket(struct socket *so, struct xsocket *xso)
{
xso->xso_len = sizeof *xso;
- xso->xso_so = so;
+ xso->xso_so = (uintptr_t)so;
xso->so_type = so->so_type;
xso->so_options = so->so_options;
xso->so_linger = so->so_linger;
xso->so_state = so->so_state;
- xso->so_pcb = so->so_pcb;
+ xso->so_pcb = (uintptr_t)so->so_pcb;
xso->xso_protocol = so->so_proto->pr_protocol;
xso->xso_family = so->so_proto->pr_domain->dom_family;
xso->so_timeo = so->so_timeo;
diff --git a/freebsd/sys/kern/uipc_syscalls.c b/freebsd/sys/kern/uipc_syscalls.c
index 0872aa62..9c4c52e4 100644
--- a/freebsd/sys/kern/uipc_syscalls.c
+++ b/freebsd/sys/kern/uipc_syscalls.c
@@ -60,6 +60,8 @@ __FBSDID("$FreeBSD$");
#include <sys/socketvar.h>
#include <sys/syscallsubr.h>
#include <sys/uio.h>
+#include <sys/un.h>
+#include <sys/unpcb.h>
#ifdef KTRACE
#include <sys/ktrace.h>
#endif
@@ -831,6 +833,15 @@ kern_socketpair(struct thread *td, int domain, int type, int protocol,
error = soconnect2(so2, so1);
if (error != 0)
goto free4;
+ } else if (so1->so_proto->pr_flags & PR_CONNREQUIRED) {
+ struct unpcb *unp, *unp2;
+ unp = sotounpcb(so1);
+ unp2 = sotounpcb(so2);
+ /*
+ * No need to lock the unps, because the sockets are brand-new.
+ * No other threads can be using them yet
+ */
+ unp_copy_peercred(td, unp, unp2, unp);
}
finit(fp1, FREAD | FWRITE | fflag, DTYPE_SOCKET, fp1->f_data,
&socketops);
@@ -1260,7 +1271,7 @@ kern_recvit(struct thread *td, int s, struct msghdr *mp, enum uio_seg fromseg,
{
struct uio auio;
struct iovec *iov;
- struct mbuf *m, *control = NULL;
+ struct mbuf *control, *m;
caddr_t ctlbuf;
struct file *fp;
struct socket *so;
@@ -1307,6 +1318,7 @@ kern_recvit(struct thread *td, int s, struct msghdr *mp, enum uio_seg fromseg,
if (KTRPOINT(td, KTR_GENIO))
ktruio = cloneuio(&auio);
#endif
+ control = NULL;
len = auio.uio_resid;
error = soreceive(so, &fromsa, &auio, NULL,
(mp->msg_control || controlp) ? &control : NULL,
@@ -1370,30 +1382,22 @@ kern_recvit(struct thread *td, int s, struct msghdr *mp, enum uio_seg fromseg,
control->m_data += sizeof (struct cmsghdr);
}
#endif
+ ctlbuf = mp->msg_control;
len = mp->msg_controllen;
- m = control;
mp->msg_controllen = 0;
- ctlbuf = mp->msg_control;
-
- while (m && len > 0) {
- unsigned int tocopy;
-
- if (len >= m->m_len)
- tocopy = m->m_len;
- else {
- mp->msg_flags |= MSG_CTRUNC;
- tocopy = len;
- }
-
- if ((error = copyout(mtod(m, caddr_t),
- ctlbuf, tocopy)) != 0)
+ for (m = control; m != NULL && len >= m->m_len; m = m->m_next) {
+ if ((error = copyout(mtod(m, caddr_t), ctlbuf,
+ m->m_len)) != 0)
goto out;
- ctlbuf += tocopy;
- len -= tocopy;
- m = m->m_next;
+ ctlbuf += m->m_len;
+ len -= m->m_len;
+ mp->msg_controllen += m->m_len;
+ }
+ if (m != NULL) {
+ mp->msg_flags |= MSG_CTRUNC;
+ m_dispose_extcontrolm(m);
}
- mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control;
}
out:
fdrop(fp, td);
@@ -1405,8 +1409,11 @@ out:
if (error == 0 && controlp != NULL)
*controlp = control;
- else if (control)
+ else if (control != NULL) {
+ if (error != 0)
+ m_dispose_extcontrolm(control);
m_freem(control);
+ }
return (error);
}
@@ -2134,3 +2141,51 @@ getsockaddr(struct sockaddr **namp, caddr_t uaddr, size_t len)
return (0);
#endif /* __rtems__ */
}
+
+/*
+ * Dispose of externalized rights from an SCM_RIGHTS message. This function
+ * should be used in error or truncation cases to avoid leaking file descriptors
+ * into the recipient's (the current thread's) table.
+ */
+void
+m_dispose_extcontrolm(struct mbuf *m)
+{
+ struct cmsghdr *cm;
+ struct file *fp;
+ struct thread *td;
+ socklen_t clen, datalen;
+ int error, fd, *fds, nfd;
+
+ td = curthread;
+ for (; m != NULL; m = m->m_next) {
+ if (m->m_type != MT_EXTCONTROL)
+ continue;
+ cm = mtod(m, struct cmsghdr *);
+ clen = m->m_len;
+ while (clen > 0) {
+ if (clen < sizeof(*cm))
+ panic("%s: truncated mbuf %p", __func__, m);
+ datalen = CMSG_SPACE(cm->cmsg_len - CMSG_SPACE(0));
+ if (clen < datalen)
+ panic("%s: truncated mbuf %p", __func__, m);
+
+ if (cm->cmsg_level == SOL_SOCKET &&
+ cm->cmsg_type == SCM_RIGHTS) {
+ fds = (int *)CMSG_DATA(cm);
+ nfd = (cm->cmsg_len - CMSG_SPACE(0)) /
+ sizeof(int);
+
+ while (nfd-- > 0) {
+ fd = *fds++;
+ error = fget(td, fd, &cap_no_rights,
+ &fp);
+ if (error == 0)
+ fdclose(td, fp, fd);
+ }
+ }
+ clen -= datalen;
+ cm = (struct cmsghdr *)((uint8_t *)cm + datalen);
+ }
+ m_chtype(m, MT_CONTROL);
+ }
+}
diff --git a/freebsd/sys/kern/uipc_usrreq.c b/freebsd/sys/kern/uipc_usrreq.c
index 688682d4..c1885ed6 100644
--- a/freebsd/sys/kern/uipc_usrreq.c
+++ b/freebsd/sys/kern/uipc_usrreq.c
@@ -376,33 +376,32 @@ unp_pcb_lock2(struct unpcb *unp, struct unpcb *unp2)
}
static __noinline void
-unp_pcb_owned_lock2_slowpath(struct unpcb *unp, struct unpcb **unp2p, int *freed)
-
+unp_pcb_owned_lock2_slowpath(struct unpcb *unp, struct unpcb **unp2p,
+ int *freed)
{
struct unpcb *unp2;
unp2 = *unp2p;
- unp_pcb_hold((unp2));
- UNP_PCB_UNLOCK((unp));
- UNP_PCB_LOCK((unp2));
- UNP_PCB_LOCK((unp));
- *freed = unp_pcb_rele((unp2));
+ unp_pcb_hold(unp2);
+ UNP_PCB_UNLOCK(unp);
+ UNP_PCB_LOCK(unp2);
+ UNP_PCB_LOCK(unp);
+ *freed = unp_pcb_rele(unp2);
if (*freed)
*unp2p = NULL;
}
-#define unp_pcb_owned_lock2(unp, unp2, freed) do { \
- freed = 0; \
- UNP_PCB_LOCK_ASSERT((unp)); \
- UNP_PCB_UNLOCK_ASSERT((unp2)); \
- MPASS(unp != unp2); \
- if (__predict_true(UNP_PCB_TRYLOCK((unp2)))) \
- break; \
- else if ((uintptr_t)(unp2) > (uintptr_t)(unp)) \
- UNP_PCB_LOCK((unp2)); \
- else { \
- unp_pcb_owned_lock2_slowpath((unp), &(unp2), &freed); \
- } \
+#define unp_pcb_owned_lock2(unp, unp2, freed) do { \
+ freed = 0; \
+ UNP_PCB_LOCK_ASSERT(unp); \
+ UNP_PCB_UNLOCK_ASSERT(unp2); \
+ MPASS((unp) != (unp2)); \
+ if (__predict_true(UNP_PCB_TRYLOCK(unp2))) \
+ break; \
+ else if ((uintptr_t)(unp2) > (uintptr_t)(unp)) \
+ UNP_PCB_LOCK(unp2); \
+ else \
+ unp_pcb_owned_lock2_slowpath((unp), &(unp2), &freed); \
} while (0)
@@ -992,21 +991,19 @@ uipc_disconnect(struct socket *so)
UNP_PCB_UNLOCK(unp);
return (0);
}
- if (unp == unp2) {
- if (unp_pcb_rele(unp) == 0)
+ if (__predict_true(unp != unp2)) {
+ unp_pcb_owned_lock2(unp, unp2, freed);
+ if (__predict_false(freed)) {
UNP_PCB_UNLOCK(unp);
+ return (0);
+ }
+ unp_pcb_hold(unp2);
}
- unp_pcb_owned_lock2(unp, unp2, freed);
- if (__predict_false(freed)) {
- UNP_PCB_UNLOCK(unp);
- return (0);
- }
- unp_pcb_hold(unp2);
unp_pcb_hold(unp);
unp_disconnect(unp, unp2);
if (unp_pcb_rele(unp) == 0)
UNP_PCB_UNLOCK(unp);
- if (unp_pcb_rele(unp2) == 0)
+ if ((unp != unp2) && unp_pcb_rele(unp2) == 0)
UNP_PCB_UNLOCK(unp2);
return (0);
}
@@ -1305,16 +1302,22 @@ uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
control = unp_addsockcred(td, control);
#endif /* __rtems__ */
}
+
/*
- * Send to paired receive port, and then reduce send buffer
- * hiwater marks to maintain backpressure. Wake up readers.
+ * Send to paired receive port and wake up readers. Don't
+ * check for space available in the receive buffer if we're
+ * attaching ancillary data; Unix domain sockets only check
+ * for space in the sending sockbuf, and that check is
+ * performed one level up the stack. At that level we cannot
+ * precisely account for the amount of buffer space used
+ * (e.g., because control messages are not yet internalized).
*/
switch (so->so_type) {
case SOCK_STREAM:
if (control != NULL) {
- if (sbappendcontrol_locked(&so2->so_rcv, m,
- control))
- control = NULL;
+ sbappendcontrol_locked(&so2->so_rcv, m,
+ control);
+ control = NULL;
} else
sbappend_locked(&so2->so_rcv, m, flags);
break;
@@ -1323,14 +1326,8 @@ uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
const struct sockaddr *from;
from = &sun_noname;
- /*
- * Don't check for space available in so2->so_rcv.
- * Unix domain sockets only check for space in the
- * sending sockbuf, and that check is performed one
- * level up the stack.
- */
if (sbappendaddr_nospacecheck_locked(&so2->so_rcv,
- from, m, control))
+ from, m, control))
control = NULL;
break;
}
@@ -1396,14 +1393,21 @@ uipc_ready(struct socket *so, struct mbuf *m, int count)
unp = sotounpcb(so);
- UNP_LINK_RLOCK();
+ UNP_PCB_LOCK(unp);
if ((unp2 = unp->unp_conn) == NULL) {
- UNP_LINK_RUNLOCK();
- for (int i = 0; i < count; i++)
- m = m_free(m);
- return (ECONNRESET);
+ UNP_PCB_UNLOCK(unp);
+ goto error;
+ }
+ if (unp != unp2) {
+ if (UNP_PCB_TRYLOCK(unp2) == 0) {
+ unp_pcb_hold(unp2);
+ UNP_PCB_UNLOCK(unp);
+ UNP_PCB_LOCK(unp2);
+ if (unp_pcb_rele(unp2))
+ goto error;
+ } else
+ UNP_PCB_UNLOCK(unp);
}
- UNP_PCB_LOCK(unp2);
so2 = unp2->unp_socket;
SOCKBUF_LOCK(&so2->so_rcv);
@@ -1413,9 +1417,12 @@ uipc_ready(struct socket *so, struct mbuf *m, int count)
SOCKBUF_UNLOCK(&so2->so_rcv);
UNP_PCB_UNLOCK(unp2);
- UNP_LINK_RUNLOCK();
return (error);
+ error:
+ for (int i = 0; i < count; i++)
+ m = m_free(m);
+ return (ECONNRESET);
}
static int
@@ -1778,24 +1785,8 @@ unp_connectat(int fd, struct socket *so, struct sockaddr *nam,
sa = NULL;
}
- /*
- * The connector's (client's) credentials are copied from its
- * process structure at the time of connect() (which is now).
- */
- cru2x(td->td_ucred, &unp3->unp_peercred);
- unp3->unp_flags |= UNP_HAVEPC;
+ unp_copy_peercred(td, unp3, unp, unp2);
- /*
- * The receiver's (server's) credentials are copied from the
- * unp_peercred member of socket on which the former called
- * listen(); uipc_listen() cached that process's credentials
- * at that time so we can use them now.
- */
- memcpy(&unp->unp_peercred, &unp2->unp_peercred,
- sizeof(unp->unp_peercred));
- unp->unp_flags |= UNP_HAVEPC;
- if (unp2->unp_flags & UNP_WANTCRED)
- unp3->unp_flags |= UNP_WANTCRED;
UNP_PCB_UNLOCK(unp2);
unp2 = unp3;
unp_pcb_owned_lock2(unp2, unp, freed);
@@ -1838,6 +1829,27 @@ bad:
return (error);
}
+/*
+ * Set socket peer credentials at connection time.
+ *
+ * The client's PCB credentials are copied from its process structure. The
+ * server's PCB credentials are copied from the socket on which it called
+ * listen(2). uipc_listen cached that process's credentials at the time.
+ */
+void
+unp_copy_peercred(struct thread *td, struct unpcb *client_unp,
+ struct unpcb *server_unp, struct unpcb *listen_unp)
+{
+ cru2x(td->td_ucred, &client_unp->unp_peercred);
+ client_unp->unp_flags |= UNP_HAVEPC;
+
+ memcpy(&server_unp->unp_peercred, &listen_unp->unp_peercred,
+ sizeof(server_unp->unp_peercred));
+ server_unp->unp_flags |= UNP_HAVEPC;
+ if (listen_unp->unp_flags & UNP_WANTCRED)
+ client_unp->unp_flags |= UNP_WANTCRED;
+}
+
static int
unp_connect2(struct socket *so, struct socket *so2, int req)
{
@@ -2026,7 +2038,7 @@ unp_pcblist(SYSCTL_HANDLER_ARGS)
if (freeunp == 0 && unp->unp_gencnt <= gencnt) {
xu->xu_len = sizeof *xu;
- xu->xu_unpp = unp;
+ xu->xu_unpp = (uintptr_t)unp;
/*
* XXX - need more locking here to protect against
* connect/disconnect races for SMP.
@@ -2043,10 +2055,10 @@ unp_pcblist(SYSCTL_HANDLER_ARGS)
unp->unp_conn->unp_addr->sun_len);
else
bzero(&xu->xu_caddr, sizeof(xu->xu_caddr));
- xu->unp_vnode = unp->unp_vnode;
- xu->unp_conn = unp->unp_conn;
- xu->xu_firstref = LIST_FIRST(&unp->unp_refs);
- xu->xu_nextref = LIST_NEXT(unp, unp_reflink);
+ xu->unp_vnode = (uintptr_t)unp->unp_vnode;
+ xu->unp_conn = (uintptr_t)unp->unp_conn;
+ xu->xu_firstref = (uintptr_t)LIST_FIRST(&unp->unp_refs);
+ xu->xu_nextref = (uintptr_t)LIST_NEXT(unp, unp_reflink);
xu->unp_gencnt = unp->unp_gencnt;
sotoxsocket(unp->unp_socket, &xu->xu_socket);
UNP_PCB_UNLOCK(unp);
@@ -2220,6 +2232,13 @@ unp_externalize(struct mbuf *control, struct mbuf **controlp, int flags)
&fdep[i]->fde_caps);
unp_externalize_fp(fdep[i]->fde_file);
}
+
+ /*
+ * The new type indicates that the mbuf data refers to
+ * kernel resources that may need to be released before
+ * the mbuf is freed.
+ */
+ m_chtype(*controlp, MT_EXTCONTROL);
FILEDESC_XUNLOCK(fdesc);
free(fdep[0], M_FILECAPS);
} else {