diff options
author | Sebastian Huber <sebastian.huber@embedded-brains.de> | 2018-08-22 14:59:50 +0200 |
---|---|---|
committer | Sebastian Huber <sebastian.huber@embedded-brains.de> | 2018-09-21 10:29:41 +0200 |
commit | 3489e3b6396ee9944a6a2e19e675ca54c36993b4 (patch) | |
tree | cd55cfac1c96ff4b888a9606fd6a0d8eb65bb446 /freebsd/sys/kern | |
parent | ck: Define CK_MD_PPC32_LWSYNC if available (diff) | |
download | rtems-libbsd-3489e3b6396ee9944a6a2e19e675ca54c36993b4.tar.bz2 |
Update to FreeBSD head 2018-09-17
Git mirror commit 6c2192b1ef8c50788c751f878552526800b1e319.
Update #3472.
Diffstat (limited to 'freebsd/sys/kern')
-rw-r--r-- | freebsd/sys/kern/init_main.c | 24 | ||||
-rw-r--r-- | freebsd/sys/kern/kern_event.c | 116 | ||||
-rw-r--r-- | freebsd/sys/kern/kern_intr.c | 193 | ||||
-rw-r--r-- | freebsd/sys/kern/kern_sysctl.c | 102 | ||||
-rw-r--r-- | freebsd/sys/kern/kern_time.c | 4 | ||||
-rw-r--r-- | freebsd/sys/kern/subr_blist.c | 14 | ||||
-rw-r--r-- | freebsd/sys/kern/subr_bus.c | 133 | ||||
-rw-r--r-- | freebsd/sys/kern/subr_counter.c | 15 | ||||
-rw-r--r-- | freebsd/sys/kern/subr_gtaskqueue.c | 18 | ||||
-rw-r--r-- | freebsd/sys/kern/subr_hints.c | 353 | ||||
-rw-r--r-- | freebsd/sys/kern/subr_module.c | 36 | ||||
-rw-r--r-- | freebsd/sys/kern/subr_pcpu.c | 2 | ||||
-rw-r--r-- | freebsd/sys/kern/subr_prf.c | 25 | ||||
-rwxr-xr-x | freebsd/sys/kern/sys_pipe.c | 99 | ||||
-rw-r--r-- | freebsd/sys/kern/uipc_sockbuf.c | 79 | ||||
-rw-r--r-- | freebsd/sys/kern/uipc_socket.c | 28 | ||||
-rw-r--r-- | freebsd/sys/kern/uipc_syscalls.c | 97 | ||||
-rw-r--r-- | freebsd/sys/kern/uipc_usrreq.c | 157 |
18 files changed, 952 insertions, 543 deletions
diff --git a/freebsd/sys/kern/init_main.c b/freebsd/sys/kern/init_main.c index 42afff5e..fa4951d8 100644 --- a/freebsd/sys/kern/init_main.c +++ b/freebsd/sys/kern/init_main.c @@ -122,6 +122,18 @@ int bootverbose = BOOTVERBOSE; SYSCTL_INT(_debug, OID_AUTO, bootverbose, CTLFLAG_RW, &bootverbose, 0, "Control the output of verbose kernel messages"); +#ifdef VERBOSE_SYSINIT +/* + * We'll use the defined value of VERBOSE_SYSINIT from the kernel config to + * dictate the default VERBOSE_SYSINIT behavior. Significant values for this + * option and associated tunable are: + * - 0, 'compiled in but silent by default' + * - 1, 'compiled in but verbose by default' (default) + */ +int verbose_sysinit = VERBOSE_SYSINIT; +TUNABLE_INT("debug.verbose_sysinit", &verbose_sysinit); +#endif + #ifdef INVARIANTS FEATURE(invariants, "Kernel compiled with INVARIANTS, may affect performance"); #endif @@ -287,7 +299,7 @@ restart: continue; #if defined(VERBOSE_SYSINIT) - if ((*sipp)->subsystem > last) { + if ((*sipp)->subsystem > last && verbose_sysinit != 0) { verbose = 1; last = (*sipp)->subsystem; printf("subsystem %x\n", last); @@ -526,6 +538,7 @@ proc0_init(void *dummy __unused) p->p_peers = 0; p->p_leader = p; p->p_reaper = p; + p->p_treeflag |= P_TREE_REAPER; LIST_INIT(&p->p_reaplist); strncpy(p->p_comm, "kernel", sizeof (p->p_comm)); @@ -642,17 +655,23 @@ proc0_post(void *dummy __unused) */ sx_slock(&allproc_lock); FOREACH_PROC_IN_SYSTEM(p) { + PROC_LOCK(p); + if (p->p_state == PRS_NEW) { + PROC_UNLOCK(p); + continue; + } microuptime(&p->p_stats->p_start); PROC_STATLOCK(p); rufetch(p, &ru); /* Clears thread stats */ - PROC_STATUNLOCK(p); p->p_rux.rux_runtime = 0; p->p_rux.rux_uticks = 0; p->p_rux.rux_sticks = 0; p->p_rux.rux_iticks = 0; + PROC_STATUNLOCK(p); FOREACH_THREAD_IN_PROC(p, td) { td->td_runtime = 0; } + PROC_UNLOCK(p); } sx_sunlock(&allproc_lock); PCPU_SET(switchtime, cpu_ticks()); @@ -857,7 +876,6 @@ create_init(const void *udata __unused) PROC_LOCK(initproc); initproc->p_flag |= P_SYSTEM | P_INMEM; initproc->p_treeflag |= P_TREE_REAPER; - LIST_INSERT_HEAD(&initproc->p_reaplist, &proc0, p_reapsibling); oldcred = initproc->p_ucred; crcopy(newcred, oldcred); #ifdef MAC diff --git a/freebsd/sys/kern/kern_event.c b/freebsd/sys/kern/kern_event.c index 33fca549..25a9518f 100644 --- a/freebsd/sys/kern/kern_event.c +++ b/freebsd/sys/kern/kern_event.c @@ -179,6 +179,10 @@ static int filt_fileattach(struct knote *kn); static void filt_timerexpire(void *knx); static int filt_timerattach(struct knote *kn); static void filt_timerdetach(struct knote *kn); +static void filt_timerstart(struct knote *kn, sbintime_t to); +static void filt_timertouch(struct knote *kn, struct kevent *kev, + u_long type); +static int filt_timervalidate(struct knote *kn, sbintime_t *to); static int filt_timer(struct knote *kn, long hint); static int filt_userattach(struct knote *kn); static void filt_userdetach(struct knote *kn); @@ -209,6 +213,7 @@ static struct filterops timer_filtops = { .f_attach = filt_timerattach, .f_detach = filt_timerdetach, .f_event = filt_timer, + .f_touch = filt_timertouch, }; static struct filterops user_filtops = { .f_attach = filt_userattach, @@ -738,29 +743,44 @@ filt_timerexpire(void *knx) * data contains amount of time to sleep */ static int -filt_timerattach(struct knote *kn) +filt_timervalidate(struct knote *kn, sbintime_t *to) { - struct kq_timer_cb_data *kc; struct bintime bt; - sbintime_t to, sbt; - unsigned int ncallouts; + sbintime_t sbt; if (kn->kn_sdata < 0) return (EINVAL); if (kn->kn_sdata == 0 && (kn->kn_flags & EV_ONESHOT) == 0) kn->kn_sdata = 1; - /* Only precision unit are supported in flags so far */ + /* + * The only fflags values supported are the timer unit + * (precision) and the absolute time indicator. + */ if ((kn->kn_sfflags & ~(NOTE_TIMER_PRECMASK | NOTE_ABSTIME)) != 0) return (EINVAL); - to = timer2sbintime(kn->kn_sdata, kn->kn_sfflags); + *to = timer2sbintime(kn->kn_sdata, kn->kn_sfflags); if ((kn->kn_sfflags & NOTE_ABSTIME) != 0) { getboottimebin(&bt); sbt = bttosbt(bt); - to -= sbt; + *to -= sbt; } - if (to < 0) + if (*to < 0) return (EINVAL); + return (0); +} + +static int +filt_timerattach(struct knote *kn) +{ + struct kq_timer_cb_data *kc; + sbintime_t to; + unsigned int ncallouts; + int error; + + error = filt_timervalidate(kn, &to); + if (error != 0) + return (error); do { ncallouts = kq_ncallouts; @@ -773,6 +793,17 @@ filt_timerattach(struct knote *kn) kn->kn_status &= ~KN_DETACHED; /* knlist_add clears it */ kn->kn_ptr.p_v = kc = malloc(sizeof(*kc), M_KQUEUE, M_WAITOK); callout_init(&kc->c, 1); + filt_timerstart(kn, to); + + return (0); +} + +static void +filt_timerstart(struct knote *kn, sbintime_t to) +{ + struct kq_timer_cb_data *kc; + + kc = kn->kn_ptr.p_v; if ((kn->kn_sfflags & NOTE_ABSTIME) != 0) { kc->next = to; kc->to = 0; @@ -782,8 +813,6 @@ filt_timerattach(struct knote *kn) } callout_reset_sbt_on(&kc->c, kc->next, 0, filt_timerexpire, kn, PCPU_GET(cpuid), C_ABSOLUTE); - - return (0); } static void @@ -800,6 +829,73 @@ filt_timerdetach(struct knote *kn) kn->kn_status |= KN_DETACHED; /* knlist_remove sets it */ } +static void +filt_timertouch(struct knote *kn, struct kevent *kev, u_long type) +{ + struct kq_timer_cb_data *kc; + struct kqueue *kq; + sbintime_t to; + int error; + + switch (type) { + case EVENT_REGISTER: + /* Handle re-added timers that update data/fflags */ + if (kev->flags & EV_ADD) { + kc = kn->kn_ptr.p_v; + + /* Drain any existing callout. */ + callout_drain(&kc->c); + + /* Throw away any existing undelivered record + * of the timer expiration. This is done under + * the presumption that if a process is + * re-adding this timer with new parameters, + * it is no longer interested in what may have + * happened under the old parameters. If it is + * interested, it can wait for the expiration, + * delete the old timer definition, and then + * add the new one. + * + * This has to be done while the kq is locked: + * - if enqueued, dequeue + * - make it no longer active + * - clear the count of expiration events + */ + kq = kn->kn_kq; + KQ_LOCK(kq); + if (kn->kn_status & KN_QUEUED) + knote_dequeue(kn); + + kn->kn_status &= ~KN_ACTIVE; + kn->kn_data = 0; + KQ_UNLOCK(kq); + + /* Reschedule timer based on new data/fflags */ + kn->kn_sfflags = kev->fflags; + kn->kn_sdata = kev->data; + error = filt_timervalidate(kn, &to); + if (error != 0) { + kn->kn_flags |= EV_ERROR; + kn->kn_data = error; + } else + filt_timerstart(kn, to); + } + break; + + case EVENT_PROCESS: + *kev = kn->kn_kevent; + if (kn->kn_flags & EV_CLEAR) { + kn->kn_data = 0; + kn->kn_fflags = 0; + } + break; + + default: + panic("filt_timertouch() - invalid type (%ld)", type); + break; + } +} + static int filt_timer(struct knote *kn, long hint) { diff --git a/freebsd/sys/kern/kern_intr.c b/freebsd/sys/kern/kern_intr.c index 8f6c2a6d..04914e93 100644 --- a/freebsd/sys/kern/kern_intr.c +++ b/freebsd/sys/kern/kern_intr.c @@ -175,12 +175,13 @@ ithread_update(struct intr_thread *ithd) ie = ithd->it_event; td = ithd->it_thread; + mtx_assert(&ie->ie_lock, MA_OWNED); /* Determine the overall priority of this event. */ - if (TAILQ_EMPTY(&ie->ie_handlers)) + if (CK_SLIST_EMPTY(&ie->ie_handlers)) pri = PRI_MAX_ITHD; else - pri = TAILQ_FIRST(&ie->ie_handlers)->ih_pri; + pri = CK_SLIST_FIRST(&ie->ie_handlers)->ih_pri; /* Update name and priority. */ #ifndef __rtems__ @@ -218,7 +219,7 @@ intr_event_update(struct intr_event *ie) space = 1; /* Run through all the handlers updating values. */ - TAILQ_FOREACH(ih, &ie->ie_handlers, ih_next) { + CK_SLIST_FOREACH(ih, &ie->ie_handlers, ih_next) { if (strlen(ie->ie_fullname) + strlen(ih->ih_name) + 1 < sizeof(ie->ie_fullname)) { strcat(ie->ie_fullname, " "); @@ -280,7 +281,7 @@ intr_event_create(struct intr_event **event, void *source, int flags, int irq, ie->ie_flags = flags; ie->ie_irq = irq; ie->ie_cpu = NOCPU; - TAILQ_INIT(&ie->ie_handlers); + CK_SLIST_INIT(&ie->ie_handlers); mtx_init(&ie->ie_lock, "intr event", NULL, MTX_DEF); va_start(ap, fmt); @@ -402,7 +403,7 @@ intr_lookup(int irq) TAILQ_FOREACH(ie, &event_list, ie_list) if (ie->ie_irq == irq && (ie->ie_flags & IE_SOFT) == 0 && - TAILQ_FIRST(&ie->ie_handlers) != NULL) + CK_SLIST_FIRST(&ie->ie_handlers) != NULL) break; mtx_unlock(&event_lock); return (ie); @@ -498,7 +499,7 @@ intr_event_destroy(struct intr_event *ie) mtx_lock(&event_lock); mtx_lock(&ie->ie_lock); - if (!TAILQ_EMPTY(&ie->ie_handlers)) { + if (!CK_SLIST_EMPTY(&ie->ie_handlers)) { mtx_unlock(&ie->ie_lock); mtx_unlock(&event_lock); return (EBUSY); @@ -532,7 +533,7 @@ ithread_create(const char *name) error = kproc_kthread_add(ithread_loop, ithd, &intrproc, &td, RFSTOPPED | RFHIGHPID, - 0, "intr", "%s", name); + 0, "intr", "%s", name); if (error) panic("kproc_create() failed with %d", error); thread_lock(td); @@ -573,6 +574,7 @@ intr_event_add_handler(struct intr_event *ie, const char *name, enum intr_type flags, void **cookiep) { struct intr_handler *ih, *temp_ih; + struct intr_handler **prevptr; struct intr_thread *it; if (ie == NULL || name == NULL || (handler == NULL && filter == NULL)) @@ -595,9 +597,9 @@ intr_event_add_handler(struct intr_event *ie, const char *name, /* We can only have one exclusive handler in a event. */ mtx_lock(&ie->ie_lock); - if (!TAILQ_EMPTY(&ie->ie_handlers)) { + if (!CK_SLIST_EMPTY(&ie->ie_handlers)) { if ((flags & INTR_EXCL) || - (TAILQ_FIRST(&ie->ie_handlers)->ih_flags & IH_EXCLUSIVE)) { + (CK_SLIST_FIRST(&ie->ie_handlers)->ih_flags & IH_EXCLUSIVE)) { mtx_unlock(&ie->ie_lock); free(ih, M_ITHREAD); return (EINVAL); @@ -622,14 +624,12 @@ intr_event_add_handler(struct intr_event *ie, const char *name, } /* Add the new handler to the event in priority order. */ - TAILQ_FOREACH(temp_ih, &ie->ie_handlers, ih_next) { + CK_SLIST_FOREACH_PREVPTR(temp_ih, prevptr, &ie->ie_handlers, ih_next) { if (temp_ih->ih_pri > ih->ih_pri) break; } - if (temp_ih == NULL) - TAILQ_INSERT_TAIL(&ie->ie_handlers, ih, ih_next); - else - TAILQ_INSERT_BEFORE(temp_ih, ih, ih_next); + CK_SLIST_INSERT_PREVPTR(prevptr, temp_ih, ih, ih_next); + intr_event_update(ie); CTR3(KTR_INTR, "%s: added %s to %s", __func__, ih->ih_name, @@ -656,7 +656,7 @@ intr_event_describe_handler(struct intr_event *ie, void *cookie, mtx_lock(&ie->ie_lock); #ifdef INVARIANTS - TAILQ_FOREACH(ih, &ie->ie_handlers, ih_next) { + CK_SLIST_FOREACH(ih, &ie->ie_handlers, ih_next) { if (ih == cookie) break; } @@ -718,6 +718,45 @@ intr_handler_source(void *cookie) } /* + * If intr_event_handle() is running in the ISR context at the time of the call, + * then wait for it to complete. + */ +static void +intr_event_barrier(struct intr_event *ie) +{ + int phase; + + mtx_assert(&ie->ie_lock, MA_OWNED); + phase = ie->ie_phase; + + /* + * Switch phase to direct future interrupts to the other active counter. + * Make sure that any preceding stores are visible before the switch. + */ + KASSERT(ie->ie_active[!phase] == 0, ("idle phase has activity")); + atomic_store_rel_int(&ie->ie_phase, !phase); + + /* + * This code cooperates with wait-free iteration of ie_handlers + * in intr_event_handle. + * Make sure that the removal and the phase update are not reordered + * with the active count check. + * Note that no combination of acquire and release fences can provide + * that guarantee as Store->Load sequences can always be reordered. + */ + atomic_thread_fence_seq_cst(); + + /* + * Now wait on the inactive phase. + * The acquire fence is needed so that that all post-barrier accesses + * are after the check. + */ + while (ie->ie_active[phase] > 0) + cpu_spinwait(); + atomic_thread_fence_acq(); +} + +/* * Sleep until an ithread finishes executing an interrupt handler. * * XXX Doesn't currently handle interrupt filters or fast interrupt @@ -757,16 +796,14 @@ _intr_drain(int irq) } #endif /* __rtems__ */ - #ifndef __rtems__ int intr_event_remove_handler(void *cookie) { struct intr_handler *handler = (struct intr_handler *)cookie; struct intr_event *ie; -#ifdef INVARIANTS struct intr_handler *ih; -#endif + struct intr_handler **prevptr; #ifdef notyet int dead; #endif @@ -777,60 +814,48 @@ intr_event_remove_handler(void *cookie) KASSERT(ie != NULL, ("interrupt handler \"%s\" has a NULL interrupt event", handler->ih_name)); + mtx_lock(&ie->ie_lock); CTR3(KTR_INTR, "%s: removing %s from %s", __func__, handler->ih_name, ie->ie_name); -#ifdef INVARIANTS - TAILQ_FOREACH(ih, &ie->ie_handlers, ih_next) + CK_SLIST_FOREACH_PREVPTR(ih, prevptr, &ie->ie_handlers, ih_next) { if (ih == handler) - goto ok; - mtx_unlock(&ie->ie_lock); - panic("interrupt handler \"%s\" not found in interrupt event \"%s\"", - ih->ih_name, ie->ie_name); -ok: -#endif + break; + } + if (ih == NULL) { + panic("interrupt handler \"%s\" not found in " + "interrupt event \"%s\"", handler->ih_name, ie->ie_name); + } + /* - * If there is no ithread, then just remove the handler and return. - * XXX: Note that an INTR_FAST handler might be running on another - * CPU! + * If there is no ithread, then directly remove the handler. Note that + * intr_event_handle() iterates ie_handlers in a lock-less fashion, so + * care needs to be taken to keep ie_handlers consistent and to free + * the removed handler only when ie_handlers is quiescent. */ if (ie->ie_thread == NULL) { - TAILQ_REMOVE(&ie->ie_handlers, handler, ih_next); + CK_SLIST_REMOVE_PREVPTR(prevptr, ih, ih_next); + intr_event_barrier(ie); + intr_event_update(ie); mtx_unlock(&ie->ie_lock); free(handler, M_ITHREAD); return (0); } /* - * If the interrupt thread is already running, then just mark this - * handler as being dead and let the ithread do the actual removal. - * - * During a cold boot while cold is set, msleep() does not sleep, - * so we have to remove the handler here rather than letting the - * thread do it. + * Let the interrupt thread do the job. + * The interrupt source is disabled when the interrupt thread is + * running, so it does not have to worry about interaction with + * intr_event_handle(). */ - thread_lock(ie->ie_thread->it_thread); - if (!TD_AWAITING_INTR(ie->ie_thread->it_thread) && !cold) { - handler->ih_flags |= IH_DEAD; - - /* - * Ensure that the thread will process the handler list - * again and remove this handler if it has already passed - * it on the list. - * - * The release part of the following store ensures - * that the update of ih_flags is ordered before the - * it_need setting. See the comment before - * atomic_cmpset_acq(&ithd->it_need, ...) operation in - * the ithread_execute_handlers(). - */ - atomic_store_rel_int(&ie->ie_thread->it_need, 1); - } else - TAILQ_REMOVE(&ie->ie_handlers, handler, ih_next); - thread_unlock(ie->ie_thread->it_thread); + KASSERT((handler->ih_flags & IH_DEAD) == 0, + ("duplicate handle remove")); + handler->ih_flags |= IH_DEAD; + intr_event_schedule_thread(ie); while (handler->ih_flags & IH_DEAD) msleep(handler, &ie->ie_lock, 0, "iev_rmh", 0); intr_event_update(ie); + #ifdef notyet /* * XXX: This could be bad in the case of ppbus(8). Also, I think @@ -838,8 +863,8 @@ ok: * interrupt. */ dead = 1; - TAILQ_FOREACH(ih, &ie->ie_handlers, ih_next) { - if (!(ih->ih_flags & IH_FAST)) { + CK_SLIST_FOREACH(ih, &ie->ie_handlers, ih_next) { + if (ih->ih_handler != NULL) { dead = 0; break; } @@ -866,7 +891,7 @@ intr_event_schedule_thread(struct intr_event *ie) /* * If no ithread or no handlers, then we have a stray interrupt. */ - if (ie == NULL || TAILQ_EMPTY(&ie->ie_handlers) || + if (ie == NULL || CK_SLIST_EMPTY(&ie->ie_handlers) || ie->ie_thread == NULL) return (EINVAL); @@ -881,7 +906,7 @@ intr_event_schedule_thread(struct intr_event *ie) if (ie->ie_flags & IE_ENTROPY) { entropy.event = (uintptr_t)ie; entropy.td = ctd; - random_harvest_queue(&entropy, sizeof(entropy), 2, RANDOM_INTERRUPT); + random_harvest_queue(&entropy, sizeof(entropy), RANDOM_INTERRUPT); } #ifndef __rtems__ @@ -981,7 +1006,7 @@ swi_sched(void *cookie, int flags) entropy.event = (uintptr_t)ih; entropy.td = curthread; - random_harvest_queue(&entropy, sizeof(entropy), 1, RANDOM_SWI); + random_harvest_queue(&entropy, sizeof(entropy), RANDOM_SWI); /* * Set ih_need for this handler so that if the ithread is already @@ -1012,32 +1037,37 @@ swi_remove(void *cookie) return (intr_event_remove_handler(cookie)); } - - #endif /* __rtems__ */ -/* - * This is a public function for use by drivers that mux interrupt - * handlers for child devices from their interrupt handler. - */ -void + +static void intr_event_execute_handlers(struct proc *p, struct intr_event *ie) { - struct intr_handler *ih, *ihn; + struct intr_handler *ih, *ihn, *ihp; - TAILQ_FOREACH_SAFE(ih, &ie->ie_handlers, ih_next, ihn) { + ihp = NULL; + CK_SLIST_FOREACH_SAFE(ih, &ie->ie_handlers, ih_next, ihn) { /* * If this handler is marked for death, remove it from * the list of handlers and wake up the sleeper. */ if (ih->ih_flags & IH_DEAD) { mtx_lock(&ie->ie_lock); - TAILQ_REMOVE(&ie->ie_handlers, ih, ih_next); + if (ihp == NULL) + CK_SLIST_REMOVE_HEAD(&ie->ie_handlers, ih_next); + else + CK_SLIST_REMOVE_AFTER(ihp, ih_next); ih->ih_flags &= ~IH_DEAD; wakeup(ih); mtx_unlock(&ie->ie_lock); continue; } + /* + * Now that we know that the current element won't be removed + * update the previous element. + */ + ihp = ih; + /* Skip filter only handlers */ if (ih->ih_handler == NULL) continue; @@ -1226,6 +1256,7 @@ intr_event_handle(struct intr_event *ie, struct trapframe *frame) struct trapframe *oldframe; struct thread *td; int ret, thread; + int phase; td = curthread; @@ -1234,7 +1265,7 @@ intr_event_handle(struct intr_event *ie, struct trapframe *frame) #endif /* An interrupt with no event or handlers is a stray interrupt. */ - if (ie == NULL || TAILQ_EMPTY(&ie->ie_handlers)) + if (ie == NULL || CK_SLIST_EMPTY(&ie->ie_handlers)) return (EINVAL); /* @@ -1249,7 +1280,17 @@ intr_event_handle(struct intr_event *ie, struct trapframe *frame) critical_enter(); oldframe = td->td_intr_frame; td->td_intr_frame = frame; - TAILQ_FOREACH(ih, &ie->ie_handlers, ih_next) { + + phase = ie->ie_phase; + atomic_add_int(&ie->ie_active[phase], 1); + + /* + * This fence is required to ensure that no later loads are + * re-ordered before the ie_active store. + */ + atomic_thread_fence_seq_cst(); + + CK_SLIST_FOREACH(ih, &ie->ie_handlers, ih_next) { if (ih->ih_filter == NULL) { thread = 1; continue; @@ -1286,6 +1327,8 @@ intr_event_handle(struct intr_event *ie, struct trapframe *frame) thread = 1; } } + atomic_add_rel_int(&ie->ie_active[phase], -1); + td->td_intr_frame = oldframe; if (thread) { @@ -1295,7 +1338,7 @@ intr_event_handle(struct intr_event *ie, struct trapframe *frame) if (ie->ie_post_filter != NULL) ie->ie_post_filter(ie->ie_source); } - + /* Schedule the ithread if needed. */ if (thread) { int error __unused; @@ -1441,7 +1484,7 @@ db_dump_intr_event(struct intr_event *ie, int handlers) db_printf("\n"); if (handlers) - TAILQ_FOREACH(ih, &ie->ie_handlers, ih_next) + CK_SLIST_FOREACH(ih, &ie->ie_handlers, ih_next) db_dump_intrhand(ih); } @@ -1456,7 +1499,7 @@ DB_SHOW_COMMAND(intr, db_show_intr) verbose = strchr(modif, 'v') != NULL; all = strchr(modif, 'a') != NULL; TAILQ_FOREACH(ie, &event_list, ie_list) { - if (!all && TAILQ_EMPTY(&ie->ie_handlers)) + if (!all && CK_SLIST_EMPTY(&ie->ie_handlers)) continue; db_dump_intr_event(ie, verbose); if (db_pager_quit) diff --git a/freebsd/sys/kern/kern_sysctl.c b/freebsd/sys/kern/kern_sysctl.c index b4e9711f..3baea2e4 100644 --- a/freebsd/sys/kern/kern_sysctl.c +++ b/freebsd/sys/kern/kern_sysctl.c @@ -195,13 +195,8 @@ sysctl_load_tunable_by_oid_locked(struct sysctl_oid *oidp) char path[96]; ssize_t rem = sizeof(path); ssize_t len; - uint8_t val_8; - uint16_t val_16; - uint32_t val_32; - int val_int; - long val_long; - int64_t val_64; - quad_t val_quad; + uint8_t data[512] __aligned(sizeof(uint64_t)); + int size; int error; path[--rem] = 0; @@ -229,85 +224,88 @@ sysctl_load_tunable_by_oid_locked(struct sysctl_oid *oidp) switch (oidp->oid_kind & CTLTYPE) { case CTLTYPE_INT: - if (getenv_int(path + rem, &val_int) == 0) + if (getenv_array(path + rem, data, sizeof(data), &size, + sizeof(int), GETENV_SIGNED) == 0) return; - req.newlen = sizeof(val_int); - req.newptr = &val_int; + req.newlen = size; + req.newptr = data; break; case CTLTYPE_UINT: - if (getenv_uint(path + rem, (unsigned int *)&val_int) == 0) + if (getenv_array(path + rem, data, sizeof(data), &size, + sizeof(int), GETENV_UNSIGNED) == 0) return; - req.newlen = sizeof(val_int); - req.newptr = &val_int; + req.newlen = size; + req.newptr = data; break; case CTLTYPE_LONG: - if (getenv_long(path + rem, &val_long) == 0) + if (getenv_array(path + rem, data, sizeof(data), &size, + sizeof(long), GETENV_SIGNED) == 0) return; - req.newlen = sizeof(val_long); - req.newptr = &val_long; + req.newlen = size; + req.newptr = data; break; case CTLTYPE_ULONG: - if (getenv_ulong(path + rem, (unsigned long *)&val_long) == 0) + if (getenv_array(path + rem, data, sizeof(data), &size, + sizeof(long), GETENV_UNSIGNED) == 0) return; - req.newlen = sizeof(val_long); - req.newptr = &val_long; + req.newlen = size; + req.newptr = data; break; case CTLTYPE_S8: - if (getenv_int(path + rem, &val_int) == 0) + if (getenv_array(path + rem, data, sizeof(data), &size, + sizeof(int8_t), GETENV_SIGNED) == 0) return; - val_8 = val_int; - req.newlen = sizeof(val_8); - req.newptr = &val_8; + req.newlen = size; + req.newptr = data; break; case CTLTYPE_S16: - if (getenv_int(path + rem, &val_int) == 0) + if (getenv_array(path + rem, data, sizeof(data), &size, + sizeof(int16_t), GETENV_SIGNED) == 0) return; - val_16 = val_int; - req.newlen = sizeof(val_16); - req.newptr = &val_16; + req.newlen = size; + req.newptr = data; break; case CTLTYPE_S32: - if (getenv_long(path + rem, &val_long) == 0) + if (getenv_array(path + rem, data, sizeof(data), &size, + sizeof(int32_t), GETENV_SIGNED) == 0) return; - val_32 = val_long; - req.newlen = sizeof(val_32); - req.newptr = &val_32; + req.newlen = size; + req.newptr = data; break; case CTLTYPE_S64: - if (getenv_quad(path + rem, &val_quad) == 0) + if (getenv_array(path + rem, data, sizeof(data), &size, + sizeof(int64_t), GETENV_SIGNED) == 0) return; - val_64 = val_quad; - req.newlen = sizeof(val_64); - req.newptr = &val_64; + req.newlen = size; + req.newptr = data; break; case CTLTYPE_U8: - if (getenv_uint(path + rem, (unsigned int *)&val_int) == 0) + if (getenv_array(path + rem, data, sizeof(data), &size, + sizeof(uint8_t), GETENV_UNSIGNED) == 0) return; - val_8 = val_int; - req.newlen = sizeof(val_8); - req.newptr = &val_8; + req.newlen = size; + req.newptr = data; break; case CTLTYPE_U16: - if (getenv_uint(path + rem, (unsigned int *)&val_int) == 0) + if (getenv_array(path + rem, data, sizeof(data), &size, + sizeof(uint16_t), GETENV_UNSIGNED) == 0) return; - val_16 = val_int; - req.newlen = sizeof(val_16); - req.newptr = &val_16; + req.newlen = size; + req.newptr = data; break; case CTLTYPE_U32: - if (getenv_ulong(path + rem, (unsigned long *)&val_long) == 0) + if (getenv_array(path + rem, data, sizeof(data), &size, + sizeof(uint32_t), GETENV_UNSIGNED) == 0) return; - val_32 = val_long; - req.newlen = sizeof(val_32); - req.newptr = &val_32; + req.newlen = size; + req.newptr = data; break; case CTLTYPE_U64: - /* XXX there is no getenv_uquad() */ - if (getenv_quad(path + rem, &val_quad) == 0) + if (getenv_array(path + rem, data, sizeof(data), &size, + sizeof(uint64_t), GETENV_UNSIGNED) == 0) return; - val_64 = val_quad; - req.newlen = sizeof(val_64); - req.newptr = &val_64; + req.newlen = size; + req.newptr = data; break; case CTLTYPE_STRING: penv = kern_getenv(path + rem); diff --git a/freebsd/sys/kern/kern_time.c b/freebsd/sys/kern/kern_time.c index a9c0547a..74b144cb 100644 --- a/freebsd/sys/kern/kern_time.c +++ b/freebsd/sys/kern/kern_time.c @@ -288,6 +288,8 @@ get_process_cputime(struct proc *targetp, struct timespec *ats) PROC_STATLOCK(targetp); rufetch(targetp, &ru); runtime = targetp->p_rux.rux_runtime; + if (curthread->td_proc == targetp) + runtime += cpu_ticks() - PCPU_GET(switchtime); PROC_STATUNLOCK(targetp); cputick2timespec(runtime, ats); } @@ -1577,7 +1579,7 @@ realtimer_settime(struct itimer *it, int flags, if ((flags & TIMER_ABSTIME) == 0) { /* Convert to absolute time. */ timespecadd(&it->it_time.it_value, &cts, - &it->it_time.it_value); + &it->it_time.it_value); } else { timespecsub(&ts, &cts, &ts); /* diff --git a/freebsd/sys/kern/subr_blist.c b/freebsd/sys/kern/subr_blist.c index a7d78d86..e5b40e62 100644 --- a/freebsd/sys/kern/subr_blist.c +++ b/freebsd/sys/kern/subr_blist.c @@ -226,17 +226,19 @@ blist_create(daddr_t blocks, int flags) u_daddr_t nodes, radix, skip; int digit; + if (blocks == 0) + panic("invalid block count"); + /* - * Calculate the radix and node count used for scanning. Find the last - * block that is followed by a terminator. + * Calculate the radix and node count used for scanning. */ last_block = blocks - 1; radix = BLIST_BMAP_RADIX; while (radix < blocks) { if (((last_block / radix + 1) & BLIST_META_MASK) != 0) /* - * A terminator will be added. Update last_block to the - * position just before that terminator. + * We must widen the blist to avoid partially + * filled nodes. */ last_block |= radix - 1; radix *= BLIST_META_RADIX; @@ -246,7 +248,9 @@ blist_create(daddr_t blocks, int flags) * Count the meta-nodes in the expanded tree, including the final * terminator, from the bottom level up to the root. */ - nodes = (last_block >= blocks) ? 2 : 1; + nodes = 1; + if (radix - blocks >= BLIST_BMAP_RADIX) + nodes++; last_block /= BLIST_BMAP_RADIX; while (last_block > 0) { nodes += last_block + 1; diff --git a/freebsd/sys/kern/subr_bus.c b/freebsd/sys/kern/subr_bus.c index 0626ec0a..391b2ed6 100644 --- a/freebsd/sys/kern/subr_bus.c +++ b/freebsd/sys/kern/subr_bus.c @@ -53,6 +53,7 @@ __FBSDID("$FreeBSD$"); #include <machine/bus.h> #include <sys/random.h> #include <sys/rman.h> +#include <sys/sbuf.h> #include <sys/selinfo.h> #include <sys/signalvar.h> #include <sys/smp.h> @@ -60,7 +61,6 @@ __FBSDID("$FreeBSD$"); #include <sys/systm.h> #include <sys/uio.h> #include <sys/bus.h> -#include <sys/interrupt.h> #include <sys/cpuset.h> #include <net/vnet.h> @@ -84,6 +84,8 @@ struct driverlink { kobj_class_t driver; TAILQ_ENTRY(driverlink) link; /* list of drivers in devclass */ int pass; + int flags; +#define DL_DEFERRED_PROBE 1 /* Probe deferred on this */ TAILQ_ENTRY(driverlink) passlink; }; @@ -155,6 +157,9 @@ EVENTHANDLER_LIST_DEFINE(device_detach); EVENTHANDLER_LIST_DEFINE(dev_lookup); static void devctl2_init(void); +static bool device_frozen; +#else /* __rtems__ */ +#define device_frozen false #endif /* __rtems__ */ #define DRIVERNAME(d) ((d)? d->name : "no driver") @@ -885,27 +890,18 @@ sysctl_devctl_queue(SYSCTL_HANDLER_ARGS) * Strings are always terminated with a NUL, but may be truncated if longer * than @p len bytes after quotes. * - * @param dst Buffer to hold the string. Must be at least @p len bytes long + * @param sb sbuf to place the characters into * @param src Original buffer. - * @param len Length of buffer pointed to by @dst, including trailing NUL */ void -devctl_safe_quote(char *dst, const char *src, size_t len) +devctl_safe_quote_sb(struct sbuf *sb, const char *src) { - char *walker = dst, *ep = dst + len - 1; - if (len == 0) - return; - while (src != NULL && walker < ep) - { - if (*src == '"' || *src == '\\') { - if (ep - walker < 2) - break; - *walker++ = '\\'; - } - *walker++ = *src++; + while (*src != '\0') { + if (*src == '"' || *src == '\\') + sbuf_putc(sb, '\\'); + sbuf_putc(sb, *src++); } - *walker = '\0'; } /* End of /dev/devctl code */ @@ -1204,7 +1200,11 @@ devclass_add_driver(devclass_t dc, driver_t *driver, int pass, devclass_t *dcp) dl->pass = pass; driver_register_pass(dl); - devclass_driver_added(dc, driver); + if (device_frozen) { + dl->flags |= DL_DEFERRED_PROBE; + } else { + devclass_driver_added(dc, driver); + } bus_data_generation_update(); return (0); } @@ -1244,6 +1244,9 @@ devclass_driver_deleted(devclass_t busclass, devclass_t dc, driver_t *driver) * Note that since a driver can be in multiple devclasses, we * should not detach devices which are not children of devices in * the affected devclass. + * + * If we're frozen, we don't generate NOMATCH events. Mark to + * generate later. */ for (i = 0; i < dc->maxunit; i++) { if (dc->devices[i]) { @@ -1252,9 +1255,14 @@ devclass_driver_deleted(devclass_t busclass, devclass_t dc, driver_t *driver) dev->parent->devclass == busclass) { if ((error = device_detach(dev)) != 0) return (error); - BUS_PROBE_NOMATCH(dev->parent, dev); - devnomatch(dev); - dev->flags |= DF_DONENOMATCH; + if (device_frozen) { + dev->flags &= ~DF_DONENOMATCH; + dev->flags |= DF_NEEDNOMATCH; + } else { + BUS_PROBE_NOMATCH(dev->parent, dev); + devnomatch(dev); + dev->flags |= DF_DONENOMATCH; + } } } } @@ -2958,6 +2966,7 @@ int device_attach(device_t dev) { uint64_t attachtime; + uint16_t attachentropy; int error; #ifndef __rtems__ @@ -2985,19 +2994,12 @@ device_attach(device_t dev) dev->state = DS_NOTPRESENT; return (error); } - attachtime = get_cyclecount() - attachtime; - /* - * 4 bits per device is a reasonable value for desktop and server - * hardware with good get_cyclecount() implementations, but WILL - * need to be adjusted on other platforms. + dev->flags |= DF_ATTACHED_ONCE; + /* We only need the low bits of this time, but ranges from tens to thousands + * have been seen, so keep 2 bytes' worth. */ -#define RANDOM_PROBE_BIT_GUESS 4 - if (bootverbose) - printf("random: harvesting attach, %zu bytes (%d bits) from %s%d\n", - sizeof(attachtime), RANDOM_PROBE_BIT_GUESS, - dev->driver->name, dev->unit); - random_harvest_direct(&attachtime, sizeof(attachtime), - RANDOM_PROBE_BIT_GUESS, RANDOM_ATTACH); + attachentropy = (uint16_t)(get_cyclecount() - attachtime); + random_harvest_direct(&attachentropy, sizeof(attachentropy), RANDOM_ATTACH); device_sysctl_update(dev); if (dev->busy) dev->state = DS_BUSY; @@ -5474,6 +5476,53 @@ driver_exists(device_t bus, const char *driver) return (false); } +static void +device_gen_nomatch(device_t dev) +{ + device_t child; + + if (dev->flags & DF_NEEDNOMATCH && + dev->state == DS_NOTPRESENT) { + BUS_PROBE_NOMATCH(dev->parent, dev); + devnomatch(dev); + dev->flags |= DF_DONENOMATCH; + } + dev->flags &= ~DF_NEEDNOMATCH; + TAILQ_FOREACH(child, &dev->children, link) { + device_gen_nomatch(child); + } +} + +static void +device_do_deferred_actions(void) +{ + devclass_t dc; + driverlink_t dl; + + /* + * Walk through the devclasses to find all the drivers we've tagged as + * deferred during the freeze and call the driver added routines. They + * have already been added to the lists in the background, so the driver + * added routines that trigger a probe will have all the right bidders + * for the probe auction. + */ + TAILQ_FOREACH(dc, &devclasses, link) { + TAILQ_FOREACH(dl, &dc->drivers, link) { + if (dl->flags & DL_DEFERRED_PROBE) { + devclass_driver_added(dc, dl->driver); + dl->flags &= ~DL_DEFERRED_PROBE; + } + } + } + + /* + * We also defer no-match events during a freeze. Walk the tree and + * generate all the pent-up events that are still relevant. + */ + device_gen_nomatch(root_bus); + bus_data_generation_update(); +} + static int devctl2_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, struct thread *td) @@ -5500,6 +5549,10 @@ devctl2_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, if (error == 0) error = find_device(req, &dev); break; + case DEV_FREEZE: + case DEV_THAW: + error = priv_check(td, PRIV_DRIVER); + break; default: error = ENOTTY; break; @@ -5703,7 +5756,23 @@ devctl2_ioctl(struct cdev *cdev, u_long cmd, caddr_t data, int fflag, error = device_delete_child(parent, dev); break; } +#ifndef __rtems__ + case DEV_FREEZE: + if (device_frozen) + error = EBUSY; + else + device_frozen = true; + break; + case DEV_THAW: + if (!device_frozen) + error = EBUSY; + else { + device_do_deferred_actions(); + device_frozen = false; + } + break; } +#endif /* __rtems__ */ mtx_unlock(&Giant); return (error); } diff --git a/freebsd/sys/kern/subr_counter.c b/freebsd/sys/kern/subr_counter.c index e4c98fae..66cda02b 100644 --- a/freebsd/sys/kern/subr_counter.c +++ b/freebsd/sys/kern/subr_counter.c @@ -44,7 +44,7 @@ __FBSDID("$FreeBSD$"); #define IN_SUBR_COUNTER_C #include <sys/counter.h> - + void counter_u64_zero(counter_u64_t c) { @@ -62,20 +62,15 @@ counter_u64_fetch(counter_u64_t c) counter_u64_t counter_u64_alloc(int flags) { - counter_u64_t r; - - r = uma_zalloc(pcpu_zone_64, flags); - if (r != NULL) - counter_u64_zero(r); - return (r); + return (uma_zalloc_pcpu(pcpu_zone_64, flags | M_ZERO)); } void counter_u64_free(counter_u64_t c) { - uma_zfree(pcpu_zone_64, c); + uma_zfree_pcpu(pcpu_zone_64, c); } int @@ -142,7 +137,7 @@ counter_ratecheck(struct counter_rate *cr, int64_t limit) val = cr->cr_over; now = ticks; - if (now - cr->cr_ticks >= hz) { + if ((u_int)(now - cr->cr_ticks) >= hz) { /* * Time to clear the structure, we are in the next second. * First try unlocked read, and then proceed with atomic. @@ -153,7 +148,7 @@ counter_ratecheck(struct counter_rate *cr, int64_t limit) * Check if other thread has just went through the * reset sequence before us. */ - if (now - cr->cr_ticks >= hz) { + if ((u_int)(now - cr->cr_ticks) >= hz) { val = counter_u64_fetch(cr->cr_rate); counter_u64_zero(cr->cr_rate); cr->cr_over = 0; diff --git a/freebsd/sys/kern/subr_gtaskqueue.c b/freebsd/sys/kern/subr_gtaskqueue.c index aa5c922d..e56e90d7 100644 --- a/freebsd/sys/kern/subr_gtaskqueue.c +++ b/freebsd/sys/kern/subr_gtaskqueue.c @@ -921,6 +921,24 @@ taskqgroup_bind(struct taskqgroup *qgroup) } } +static void +taskqgroup_config_init(void *arg) +{ + struct taskqgroup *qgroup = qgroup_config; + LIST_HEAD(, grouptask) gtask_head = LIST_HEAD_INITIALIZER(NULL); + + LIST_SWAP(>ask_head, &qgroup->tqg_queue[0].tgc_tasks, + grouptask, gt_list); + qgroup->tqg_queue[0].tgc_cnt = 0; + taskqgroup_cpu_create(qgroup, 0, 0); + + qgroup->tqg_cnt = 1; + qgroup->tqg_stride = 1; +} + +SYSINIT(taskqgroup_config_init, SI_SUB_TASKQ, SI_ORDER_SECOND, + taskqgroup_config_init, NULL); + static int _taskqgroup_adjust(struct taskqgroup *qgroup, int cnt, int stride) { diff --git a/freebsd/sys/kern/subr_hints.c b/freebsd/sys/kern/subr_hints.c index 982059c3..78ab1b4e 100644 --- a/freebsd/sys/kern/subr_hints.c +++ b/freebsd/sys/kern/subr_hints.c @@ -33,228 +33,257 @@ __FBSDID("$FreeBSD$"); #include <sys/param.h> #include <sys/lock.h> +#include <sys/kernel.h> #include <sys/malloc.h> #include <sys/mutex.h> #include <sys/sysctl.h> #include <sys/systm.h> #include <sys/bus.h> -/* - * Access functions for device resources. - */ - #ifndef __rtems__ -static int checkmethod = 1; -static int use_kenv; -static char *hintp; -#else /* __rtems__ */ -#define hintmode 1 -#define hintp static_hints -#define use_kenv 0 -static char __used default_static_hints[] = ""; -__weak_reference(default_static_hints, static_hints); -#endif /* __rtems__ */ +#define FBACK_MDENV 0 /* MD env (e.g. loader.conf) */ +#define FBACK_STENV 1 /* Static env */ +#define FBACK_STATIC 2 /* static_hints */ -#ifndef __rtems__ /* - * Define kern.hintmode sysctl, which only accept value 2, that cause to - * switch from Static KENV mode to Dynamic KENV. So systems that have hints - * compiled into kernel will be able to see/modify KENV (and hints too). + * We'll use hintenv_merged to indicate that the dynamic environment has been + * properly prepared for hint usage. This implies that the dynamic environment + * has already been setup (dynamic_kenv) and that we have added any supplied + * static_hints to the dynamic environment. + */ +static bool hintenv_merged; +/* Static environment and static hints cannot change, so we'll skip known bad */ +static bool stenv_skip; +static bool sthints_skip; +/* + * Access functions for device resources. */ -static int -sysctl_hintmode(SYSCTL_HANDLER_ARGS) +static void +static_hints_to_env(void *data __unused) { const char *cp; char *line, *eq; - int eqidx, error, from_kenv, i, value; - - from_kenv = 0; - cp = kern_envp; - value = hintmode; - - /* Fetch candidate for new hintmode value */ - error = sysctl_handle_int(oidp, &value, 0, req); - if (error || req->newptr == NULL) - return (error); - - if (value != 2) - /* Only accept swithing to hintmode 2 */ - return (EINVAL); - - /* Migrate from static to dynamic hints */ - switch (hintmode) { - case 0: - if (dynamic_kenv) { - /* - * Already here. But assign hintmode to 2, to not - * check it in the future. - */ - hintmode = 2; - return (0); - } - from_kenv = 1; - cp = kern_envp; - break; - case 1: - cp = static_hints; - break; - case 2: - /* Nothing to do, hintmode already 2 */ - return (0); - } + int eqidx, i; - while (cp) { - i = strlen(cp); - if (i == 0) - break; - if (from_kenv) { - if (strncmp(cp, "hint.", 5) != 0) - /* kenv can have not only hints */ - continue; - } + cp = static_hints; + while (cp && *cp != '\0') { eq = strchr(cp, '='); if (eq == NULL) /* Bad hint value */ continue; eqidx = eq - cp; - line = malloc(i+1, M_TEMP, M_WAITOK); + i = strlen(cp); + line = malloc(i + 1, M_TEMP, M_WAITOK); strcpy(line, cp); - line[eqidx] = '\0'; - kern_setenv(line, line + eqidx + 1); + line[eqidx] = line[i] = '\0'; + /* + * Before adding a hint to the dynamic environment, check if + * another value for said hint has already been added. This is + * needed because static environment overrides static hints and + * dynamic environment overrides all. + */ + if (testenv(line) == 0) + kern_setenv(line, line + eqidx + 1); free(line, M_TEMP); cp += i + 1; } - - hintmode = value; - use_kenv = 1; - return (0); + hintenv_merged = true; } -SYSCTL_PROC(_kern, OID_AUTO, hintmode, CTLTYPE_INT|CTLFLAG_RW, - &hintmode, 0, sysctl_hintmode, "I", "Get/set current hintmode"); +/* Any time after dynamic env is setup */ +SYSINIT(hintenv, SI_SUB_KMEM + 1, SI_ORDER_SECOND, static_hints_to_env, NULL); +#else /* __rtems__ */ +#define sthints_skip false + +static char __used default_static_hints[] = ""; +__weak_reference(default_static_hints, static_hints); #endif /* __rtems__ */ /* + * Checks the environment to see if we even have any hints. If it has no hints, + * then res_find can take the hint that there's no point in searching it and + * either move on to the next environment or fail early. + */ +static bool +_res_checkenv(char *envp) +{ + char *cp; + + cp = envp; + while (cp) { + if (strncmp(cp, "hint.", 5) == 0) + return (true); + while (*cp != '\0') + cp++; + cp++; + if (*cp == '\0') + break; + } + return (false); +} + +/* * Evil wildcarding resource string lookup. * This walks the supplied env string table and returns a match. * The start point can be remembered for incremental searches. */ static int -res_find(int *line, int *startln, +res_find(char **hintp_cookie, int *line, int *startln, const char *name, int *unit, const char *resname, const char *value, const char **ret_name, int *ret_namelen, int *ret_unit, const char **ret_resname, int *ret_resnamelen, const char **ret_value) { - int n = 0, hit, i = 0; +#ifndef __rtems__ + int fbacklvl = FBACK_MDENV, i = 0, n = 0; +#else /* __rtems__ */ + int n = 0; +#endif /* __rtems__ */ char r_name[32]; int r_unit; char r_resname[32]; char r_value[128]; const char *s, *cp; - char *p; - + char *hintp, *p; #ifndef __rtems__ - if (checkmethod) { - hintp = NULL; + bool dyn_used = false; - switch (hintmode) { - case 0: /* loader hints in environment only */ - break; - case 1: /* static hints only */ - hintp = static_hints; - checkmethod = 0; - break; - case 2: /* fallback mode */ - if (dynamic_kenv) { - mtx_lock(&kenv_lock); - cp = kenvp[0]; - for (i = 0; cp != NULL; cp = kenvp[++i]) { - if (!strncmp(cp, "hint.", 5)) { - use_kenv = 1; - checkmethod = 0; - break; - } + + /* + * We are expecting that the caller will pass us a hintp_cookie that + * they are tracking. Upon entry, if *hintp_cookie is *not* set, this + * indicates to us that we should be figuring out based on the current + * environment where to search. This keeps us sane throughout the + * entirety of a single search. + */ + if (*hintp_cookie == NULL) { + hintp = NULL; + if (hintenv_merged) { + /* + * static_hints, if it was previously used, has + * already been folded in to the environment + * by this point. + */ + mtx_lock(&kenv_lock); + cp = kenvp[0]; + for (i = 0; cp != NULL; cp = kenvp[++i]) { + if (!strncmp(cp, "hint.", 5)) { + hintp = kenvp[0]; + break; } - mtx_unlock(&kenv_lock); - } else { - cp = kern_envp; - while (cp) { - if (strncmp(cp, "hint.", 5) == 0) { - cp = NULL; - hintp = kern_envp; - break; - } - while (*cp != '\0') - cp++; - cp++; - if (*cp == '\0') { - cp = NULL; - hintp = static_hints; - break; - } + } + mtx_unlock(&kenv_lock); + dyn_used = true; + } else { + /* + * We'll have a chance to keep coming back here until + * we've actually exhausted all of our possibilities. + * We might have chosen the MD/Static env because it + * had some kind of hints, but perhaps it didn't have + * the hint we are looking for. We don't provide any + * fallback when searching the dynamic environment. + */ +fallback: + if (dyn_used || fbacklvl >= FBACK_STATIC) + return (ENOENT); + + switch (fbacklvl) { + case FBACK_MDENV: + fbacklvl++; + if (_res_checkenv(md_envp)) { + hintp = md_envp; + break; } + + /* FALLTHROUGH */ + case FBACK_STENV: + fbacklvl++; + if (!stenv_skip && _res_checkenv(kern_envp)) { + hintp = kern_envp; + break; + } else + stenv_skip = true; + + /* FALLTHROUGH */ + case FBACK_STATIC: + fbacklvl++; +#else /* __rtems__ */ + hintp = NULL; +#endif /* __rtems__ */ + /* We'll fallback to static_hints if needed/can */ + if (!sthints_skip && + _res_checkenv(static_hints)) + hintp = static_hints; +#ifndef __rtems__ + else + sthints_skip = true; + + break; + default: + return (ENOENT); } - break; - default: - break; - } - if (hintp == NULL) { - if (dynamic_kenv) { - use_kenv = 1; - checkmethod = 0; - } else - hintp = kern_envp; } +#endif /* __rtems__ */ + + if (hintp == NULL) + return (ENOENT); + *hintp_cookie = hintp; +#ifndef __rtems__ + } else { + hintp = *hintp_cookie; + if (hintenv_merged && hintp == kenvp[0]) + dyn_used = true; + else + /* + * If we aren't using the dynamic environment, we need + * to run through the proper fallback procedure again. + * This is so that we do continuations right if we're + * working with *line and *startln. + */ + goto fallback; } - if (use_kenv) { + if (dyn_used) { mtx_lock(&kenv_lock); i = 0; - cp = kenvp[0]; - if (cp == NULL) { - mtx_unlock(&kenv_lock); - return (ENOENT); - } - } else + } #endif /* __rtems__ */ - cp = hintp; + + cp = hintp; while (cp) { - hit = 1; (*line)++; if (strncmp(cp, "hint.", 5) != 0) - hit = 0; - else - n = sscanf(cp, "hint.%32[^.].%d.%32[^=]=%127s", - r_name, &r_unit, r_resname, r_value); - if (hit && n != 4) { + goto nexthint; + n = sscanf(cp, "hint.%32[^.].%d.%32[^=]=%127s", r_name, &r_unit, + r_resname, r_value); + if (n != 4) { printf("CONFIG: invalid hint '%s'\n", cp); p = strchr(cp, 'h'); *p = 'H'; - hit = 0; + goto nexthint; } - if (hit && startln && *startln >= 0 && *line < *startln) - hit = 0; - if (hit && name && strcmp(name, r_name) != 0) - hit = 0; - if (hit && unit && *unit != r_unit) - hit = 0; - if (hit && resname && strcmp(resname, r_resname) != 0) - hit = 0; - if (hit && value && strcmp(value, r_value) != 0) - hit = 0; - if (hit) - break; - if (use_kenv) { + if (startln && *startln >= 0 && *line < *startln) + goto nexthint; + if (name && strcmp(name, r_name) != 0) + goto nexthint; + if (unit && *unit != r_unit) + goto nexthint; + if (resname && strcmp(resname, r_resname) != 0) + goto nexthint; + if (value && strcmp(value, r_value) != 0) + goto nexthint; + /* Successfully found a hint matching all criteria */ + break; +nexthint: #ifndef __rtems__ + if (dyn_used) { cp = kenvp[++i]; if (cp == NULL) break; -#else /* __rtems__ */ (void) i; -#endif /* __rtems__ */ } else { +#endif /* __rtems__ */ while (*cp != '\0') cp++; cp++; @@ -262,14 +291,20 @@ res_find(int *line, int *startln, cp = NULL; break; } +#ifndef __rtems__ } +#endif /* __rtems__ */ } #ifndef __rtems__ - if (use_kenv) + if (dyn_used) mtx_unlock(&kenv_lock); #endif /* __rtems__ */ if (cp == NULL) - return ENOENT; +#ifndef __rtems__ + goto fallback; +#else /* __rtems__ */ + return (ENOENT); +#endif /* __rtems__ */ s = cp; /* This is a bit of a hack, but at least is reentrant */ @@ -307,11 +342,13 @@ resource_find(int *line, int *startln, { int i; int un; + char *hintp; *line = 0; + hintp = NULL; /* Search for exact unit matches first */ - i = res_find(line, startln, name, unit, resname, value, + i = res_find(&hintp, line, startln, name, unit, resname, value, ret_name, ret_namelen, ret_unit, ret_resname, ret_resnamelen, ret_value); if (i == 0) @@ -320,7 +357,7 @@ resource_find(int *line, int *startln, return ENOENT; /* If we are still here, search for wildcard matches */ un = -1; - i = res_find(line, startln, name, &un, resname, value, + i = res_find(&hintp, line, startln, name, &un, resname, value, ret_name, ret_namelen, ret_unit, ret_resname, ret_resnamelen, ret_value); if (i == 0) diff --git a/freebsd/sys/kern/subr_module.c b/freebsd/sys/kern/subr_module.c index d8d42653..21b2754c 100644 --- a/freebsd/sys/kern/subr_module.c +++ b/freebsd/sys/kern/subr_module.c @@ -35,6 +35,9 @@ __FBSDID("$FreeBSD$"); #include <sys/systm.h> #include <sys/linker.h> +#include <vm/vm.h> +#include <vm/vm_extern.h> + /* * Preloaded module support */ @@ -206,29 +209,42 @@ preload_search_info(caddr_t mod, int inf) void preload_delete_name(const char *name) { - caddr_t curp; - uint32_t *hdr; + caddr_t addr, curp; + uint32_t *hdr, sz; int next; int clearing; + + addr = 0; + sz = 0; if (preload_metadata != NULL) { - + clearing = 0; curp = preload_metadata; for (;;) { hdr = (uint32_t *)curp; - if (hdr[0] == 0 && hdr[1] == 0) - break; - - /* Search for a MODINFO_NAME field */ - if (hdr[0] == MODINFO_NAME) { + if (hdr[0] == MODINFO_NAME || (hdr[0] == 0 && hdr[1] == 0)) { + /* Free memory used to store the file. */ + if (addr != 0 && sz != 0) + kmem_bootstrap_free((vm_offset_t)addr, sz); + addr = 0; + sz = 0; + + if (hdr[0] == 0) + break; if (!strcmp(name, curp + sizeof(uint32_t) * 2)) clearing = 1; /* got it, start clearing */ - else if (clearing) + else if (clearing) { clearing = 0; /* at next one now.. better stop */ + } } - if (clearing) + if (clearing) { + if (hdr[0] == MODINFO_ADDR) + addr = *(caddr_t *)(curp + sizeof(uint32_t) * 2); + else if (hdr[0] == MODINFO_SIZE) + sz = *(uint32_t *)(curp + sizeof(uint32_t) * 2); hdr[0] = MODINFO_EMPTY; + } /* skip to next field */ next = sizeof(uint32_t) * 2 + hdr[1]; diff --git a/freebsd/sys/kern/subr_pcpu.c b/freebsd/sys/kern/subr_pcpu.c index 1b866e3a..0ab77996 100644 --- a/freebsd/sys/kern/subr_pcpu.c +++ b/freebsd/sys/kern/subr_pcpu.c @@ -75,7 +75,7 @@ struct dpcpu_free { TAILQ_ENTRY(dpcpu_free) df_link; }; -static DPCPU_DEFINE(char, modspace[DPCPU_MODMIN]); +DPCPU_DEFINE_STATIC(char, modspace[DPCPU_MODMIN] __aligned(__alignof(void *))); static TAILQ_HEAD(, dpcpu_free) dpcpu_head = TAILQ_HEAD_INITIALIZER(dpcpu_head); static struct sx dpcpu_lock; uintptr_t dpcpu_off[MAXCPU]; diff --git a/freebsd/sys/kern/subr_prf.c b/freebsd/sys/kern/subr_prf.c index 4c45bcfe..6e719897 100644 --- a/freebsd/sys/kern/subr_prf.c +++ b/freebsd/sys/kern/subr_prf.c @@ -135,10 +135,22 @@ static char *ksprintn(char *nbuf, uintmax_t num, int base, int *len, int upper); static void snprintf_func(int ch, void *arg); #ifndef __rtems__ -static int msgbufmapped; /* Set when safe to use msgbuf */ +static bool msgbufmapped; /* Set when safe to use msgbuf */ int msgbuftrigger; struct msgbuf *msgbufp; +#ifndef BOOT_TAG_SZ +#define BOOT_TAG_SZ 32 +#endif +#ifndef BOOT_TAG +/* Tag used to mark the start of a boot in dmesg */ +#define BOOT_TAG "---<<BOOT>>---" +#endif + +static char current_boot_tag[BOOT_TAG_SZ + 1] = BOOT_TAG; +SYSCTL_STRING(_kern, OID_AUTO, boot_tag, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, + current_boot_tag, 0, "Tag added to dmesg at start of boot"); + static int log_console_output = 1; SYSCTL_INT(_kern, OID_AUTO, log_console_output, CTLFLAG_RWTUN, &log_console_output, 0, "Duplicate console output to the syslog"); @@ -743,6 +755,7 @@ reswitch: switch (ch = (u_char)*fmt++) { padc = '0'; goto reswitch; } + /* FALLTHROUGH */ case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': for (n = 0;; ++fmt) { @@ -1057,14 +1070,22 @@ msgbufinit(void *ptr, int size) { char *cp; static struct msgbuf *oldp = NULL; + bool print_boot_tag; size -= sizeof(*msgbufp); cp = (char *)ptr; + print_boot_tag = !msgbufmapped; + /* Attempt to fetch kern.boot_tag tunable on first mapping */ + if (!msgbufmapped) + TUNABLE_STR_FETCH("kern.boot_tag", current_boot_tag, + sizeof(current_boot_tag)); msgbufp = (struct msgbuf *)(cp + size); msgbuf_reinit(msgbufp, cp, size); if (msgbufmapped && oldp != msgbufp) msgbuf_copy(oldp, msgbufp); - msgbufmapped = 1; + msgbufmapped = true; + if (print_boot_tag && *current_boot_tag != '\0') + printf("%s\n", current_boot_tag); oldp = msgbufp; } diff --git a/freebsd/sys/kern/sys_pipe.c b/freebsd/sys/kern/sys_pipe.c index e527495a..8eb0aad9 100755 --- a/freebsd/sys/kern/sys_pipe.c +++ b/freebsd/sys/kern/sys_pipe.c @@ -572,9 +572,7 @@ pipe(int fildes[2]) * If it fails it will return ENOMEM. */ static int -pipespace_new(cpipe, size) - struct pipe *cpipe; - int size; +pipespace_new(struct pipe *cpipe, int size) { caddr_t buffer; int error, cnt, firstseg; @@ -646,9 +644,7 @@ retry: * Wrapper for pipespace_new() that performs locking assertions. */ static int -pipespace(cpipe, size) - struct pipe *cpipe; - int size; +pipespace(struct pipe *cpipe, int size) { KASSERT(cpipe->pipe_state & PIPE_LOCKFL, @@ -660,9 +656,7 @@ pipespace(cpipe, size) * lock a pipe for I/O, blocking other access */ static __inline int -pipelock(cpipe, catch) - struct pipe *cpipe; - int catch; +pipelock(struct pipe *cpipe, int catch) { int error; @@ -683,8 +677,7 @@ pipelock(cpipe, catch) * unlock a pipe I/O lock */ static __inline void -pipeunlock(cpipe) - struct pipe *cpipe; +pipeunlock(struct pipe *cpipe) { PIPE_LOCK_ASSERT(cpipe, MA_OWNED); @@ -698,8 +691,7 @@ pipeunlock(cpipe) } void -pipeselwakeup(cpipe) - struct pipe *cpipe; +pipeselwakeup(struct pipe *cpipe) { PIPE_LOCK_ASSERT(cpipe, MA_OWNED); @@ -720,9 +712,7 @@ pipeselwakeup(cpipe) * will start out zero'd from the ctor, so we just manage the kmem. */ static void -pipe_create(pipe, backing) - struct pipe *pipe; - int backing; +pipe_create(struct pipe *pipe, int backing) { if (backing) { @@ -744,12 +734,8 @@ pipe_create(pipe, backing) /* ARGSUSED */ static int -pipe_read(fp, uio, active_cred, flags, td) - struct file *fp; - struct uio *uio; - struct ucred *active_cred; - struct thread *td; - int flags; +pipe_read(struct file *fp, struct uio *uio, struct ucred *active_cred, + int flags, struct thread *td) { struct pipe *rpipe; int error; @@ -995,9 +981,7 @@ rtems_bsd_pipe_readv(rtems_libio_t *iop, const struct iovec *iov, * This is similar to a physical write operation. */ static int -pipe_build_write_buffer(wpipe, uio) - struct pipe *wpipe; - struct uio *uio; +pipe_build_write_buffer(struct pipe *wpipe, struct uio *uio) { u_int size; int i; @@ -1041,8 +1025,7 @@ pipe_build_write_buffer(wpipe, uio) * unmap and unwire the process buffer */ static void -pipe_destroy_write_buffer(wpipe) - struct pipe *wpipe; +pipe_destroy_write_buffer(struct pipe *wpipe) { PIPE_LOCK_ASSERT(wpipe, MA_OWNED); @@ -1056,8 +1039,7 @@ pipe_destroy_write_buffer(wpipe) * pages can be freed without loss of data. */ static void -pipe_clone_write_buffer(wpipe) - struct pipe *wpipe; +pipe_clone_write_buffer(struct pipe *wpipe) { struct uio uio; struct iovec iov; @@ -1096,9 +1078,7 @@ pipe_clone_write_buffer(wpipe) * the pipe buffer. Then the direct mapping write is set-up. */ static int -pipe_direct_write(wpipe, uio) - struct pipe *wpipe; - struct uio *uio; +pipe_direct_write(struct pipe *wpipe, struct uio *uio) { int error; @@ -1197,12 +1177,8 @@ error1: #endif static int -pipe_write(fp, uio, active_cred, flags, td) - struct file *fp; - struct uio *uio; - struct ucred *active_cred; - struct thread *td; - int flags; +pipe_write(struct file *fp, struct uio *uio, struct ucred *active_cred, + int flags, struct thread *td) { int error = 0; int desiredsize; @@ -1553,11 +1529,8 @@ rtems_bsd_pipe_writev(rtems_libio_t *iop, const struct iovec *iov, /* ARGSUSED */ #ifndef __rtems__ static int -pipe_truncate(fp, length, active_cred, td) - struct file *fp; - off_t length; - struct ucred *active_cred; - struct thread *td; +pipe_truncate(struct file *fp, off_t length, struct ucred *active_cred, + struct thread *td) { struct pipe *cpipe; int error; @@ -1575,12 +1548,8 @@ pipe_truncate(fp, length, active_cred, td) * we implement a very minimal set of ioctls for compatibility with sockets. */ static int -pipe_ioctl(fp, cmd, data, active_cred, td) - struct file *fp; - u_long cmd; - void *data; - struct ucred *active_cred; - struct thread *td; +pipe_ioctl(struct file *fp, u_long cmd, void *data, struct ucred *active_cred, + struct thread *td) { struct pipe *mpipe = fp->f_data; int error; @@ -1672,11 +1641,8 @@ rtems_bsd_pipe_ioctl(rtems_libio_t *iop, ioctl_command_t request, void *buffer) #endif /* __rtems__ */ static int -pipe_poll(fp, events, active_cred, td) - struct file *fp; - int events; - struct ucred *active_cred; - struct thread *td; +pipe_poll(struct file *fp, int events, struct ucred *active_cred, + struct thread *td) { struct pipe *rpipe; struct pipe *wpipe; @@ -1786,11 +1752,8 @@ rtems_bsd_pipe_poll(rtems_libio_t *iop, int events) */ #ifndef __rtems__ static int -pipe_stat(fp, ub, active_cred, td) - struct file *fp; - struct stat *ub; - struct ucred *active_cred; - struct thread *td; +pipe_stat(struct file *fp, struct stat *ub, struct ucred *active_cred, + struct thread *td) { struct pipe *pipe; #else /* __rtems__ */ @@ -1889,9 +1852,7 @@ rtems_bsd_pipe_stat( /* ARGSUSED */ static int -pipe_close(fp, td) - struct file *fp; - struct thread *td; +pipe_close(struct file *fp, struct thread *td) { #ifndef __rtems__ @@ -1922,12 +1883,8 @@ pipe_chmod(struct file *fp, mode_t mode, struct ucred *active_cred, struct threa } static int -pipe_chown(fp, uid, gid, active_cred, td) - struct file *fp; - uid_t uid; - gid_t gid; - struct ucred *active_cred; - struct thread *td; +pipe_chown(struct file *fp, uid_t uid, gid_t gid, struct ucred *active_cred, + struct thread *td) { struct pipe *cpipe; int error; @@ -1957,8 +1914,7 @@ pipe_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp) #endif /* __rtems__ */ static void -pipe_free_kmem(cpipe) - struct pipe *cpipe; +pipe_free_kmem(struct pipe *cpipe) { KASSERT(!mtx_owned(PIPE_MTX(cpipe)), @@ -1988,8 +1944,7 @@ pipe_free_kmem(cpipe) * shutdown the pipe */ static void -pipeclose(cpipe) - struct pipe *cpipe; +pipeclose(struct pipe *cpipe) { struct pipepair *pp; struct pipe *ppipe; diff --git a/freebsd/sys/kern/uipc_sockbuf.c b/freebsd/sys/kern/uipc_sockbuf.c index ec493c04..cf99c615 100644 --- a/freebsd/sys/kern/uipc_sockbuf.c +++ b/freebsd/sys/kern/uipc_sockbuf.c @@ -961,23 +961,14 @@ sbappendaddr(struct sockbuf *sb, const struct sockaddr *asa, return (retval); } -int +void sbappendcontrol_locked(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control) { - struct mbuf *m, *n, *mlast; - int space; - - SOCKBUF_LOCK_ASSERT(sb); + struct mbuf *m, *mlast; - if (control == NULL) - panic("sbappendcontrol_locked"); - space = m_length(control, &n) + m_length(m0, NULL); - - if (space > sbspace(sb)) - return (0); m_clrprotoflags(m0); - n->m_next = m0; /* concatenate data to control */ + m_last(control)->m_next = m0; SBLASTRECORDCHK(sb); @@ -991,18 +982,15 @@ sbappendcontrol_locked(struct sockbuf *sb, struct mbuf *m0, SBLASTMBUFCHK(sb); SBLASTRECORDCHK(sb); - return (1); } -int +void sbappendcontrol(struct sockbuf *sb, struct mbuf *m0, struct mbuf *control) { - int retval; SOCKBUF_LOCK(sb); - retval = sbappendcontrol_locked(sb, m0, control); + sbappendcontrol_locked(sb, m0, control); SOCKBUF_UNLOCK(sb); - return (retval); } /* @@ -1289,6 +1277,63 @@ sbsndptr(struct sockbuf *sb, u_int off, u_int len, u_int *moff) return (ret); } +struct mbuf * +#ifndef __rtems__ +sbsndptr_noadv(struct sockbuf *sb, uint32_t off, uint32_t *moff) +#else /* __rtems__ */ +sbsndptr_noadv(struct sockbuf *sb, u_int off, u_int *moff) +#endif /* __rtems__ */ +{ + struct mbuf *m; + + KASSERT(sb->sb_mb != NULL, ("%s: sb_mb is NULL", __func__)); + if (sb->sb_sndptr == NULL || sb->sb_sndptroff > off) { + *moff = off; + if (sb->sb_sndptr == NULL) { + sb->sb_sndptr = sb->sb_mb; + sb->sb_sndptroff = 0; + } + return (sb->sb_mb); + } else { + m = sb->sb_sndptr; + off -= sb->sb_sndptroff; + } + *moff = off; + return (m); +} + +void +#ifndef __rtems__ +sbsndptr_adv(struct sockbuf *sb, struct mbuf *mb, uint32_t len) +#else /* __rtems__ */ +sbsndptr_adv(struct sockbuf *sb, struct mbuf *mb, u_int len) +#endif /* __rtems__ */ +{ + /* + * A small copy was done, advance forward the sb_sbsndptr to cover + * it. + */ + struct mbuf *m; + + if (mb != sb->sb_sndptr) { + /* Did not copyout at the same mbuf */ + return; + } + m = mb; + while (m && (len > 0)) { + if (len >= m->m_len) { + len -= m->m_len; + if (m->m_next) { + sb->sb_sndptroff += m->m_len; + sb->sb_sndptr = m->m_next; + } + m = m->m_next; + } else { + len = 0; + } + } +} + /* * Return the first mbuf and the mbuf data offset for the provided * send offset without changing the "sb_sndptroff" field. diff --git a/freebsd/sys/kern/uipc_socket.c b/freebsd/sys/kern/uipc_socket.c index e82642e4..3143a392 100644 --- a/freebsd/sys/kern/uipc_socket.c +++ b/freebsd/sys/kern/uipc_socket.c @@ -1126,6 +1126,8 @@ soclose(struct socket *so) drop: if (so->so_proto->pr_usrreqs->pru_close != NULL) (*so->so_proto->pr_usrreqs->pru_close)(so); + if (so->so_dtor != NULL) + so->so_dtor(so); SOCK_LOCK(so); if ((listening = (so->so_options & SO_ACCEPTCONN))) { @@ -2191,7 +2193,6 @@ release: /* * Optimized version of soreceive() for stream (TCP) sockets. - * XXXAO: (MSG_WAITALL | MSG_PEEK) isn't properly handled. */ int soreceive_stream(struct socket *so, struct sockaddr **psa, struct uio *uio, @@ -2206,12 +2207,12 @@ soreceive_stream(struct socket *so, struct sockaddr **psa, struct uio *uio, return (EINVAL); if (psa != NULL) *psa = NULL; - if (controlp != NULL) - return (EINVAL); if (flagsp != NULL) flags = *flagsp &~ MSG_EOR; else flags = 0; + if (controlp != NULL) + *controlp = NULL; if (flags & MSG_OOB) return (soreceive_rcvoob(so, uio, flags)); if (mp0 != NULL) @@ -2815,6 +2816,7 @@ sosetopt(struct socket *so, struct sockopt *sopt) case SO_BROADCAST: case SO_REUSEADDR: case SO_REUSEPORT: + case SO_REUSEPORT_LB: case SO_OOBINLINE: case SO_TIMESTAMP: case SO_BINTIME: @@ -3035,6 +3037,7 @@ sogetopt(struct socket *so, struct sockopt *sopt) case SO_KEEPALIVE: case SO_REUSEADDR: case SO_REUSEPORT: + case SO_REUSEPORT_LB: case SO_BROADCAST: case SO_OOBINLINE: case SO_ACCEPTCONN: @@ -3046,6 +3049,10 @@ integer: error = sooptcopyout(sopt, &optval, sizeof optval); break; + case SO_DOMAIN: + optval = so->so_proto->pr_domain->dom_family; + goto integer; + case SO_TYPE: optval = so->so_type; goto integer; @@ -3867,6 +3874,17 @@ sodupsockaddr(const struct sockaddr *sa, int mflags) } /* + * Register per-socket destructor. + */ +void +sodtor_set(struct socket *so, so_dtor_t *func) +{ + + SOCK_LOCK_ASSERT(so); + so->so_dtor = func; +} + +/* * Register per-socket buffer upcalls. */ void @@ -4027,12 +4045,12 @@ sotoxsocket(struct socket *so, struct xsocket *xso) { xso->xso_len = sizeof *xso; - xso->xso_so = so; + xso->xso_so = (uintptr_t)so; xso->so_type = so->so_type; xso->so_options = so->so_options; xso->so_linger = so->so_linger; xso->so_state = so->so_state; - xso->so_pcb = so->so_pcb; + xso->so_pcb = (uintptr_t)so->so_pcb; xso->xso_protocol = so->so_proto->pr_protocol; xso->xso_family = so->so_proto->pr_domain->dom_family; xso->so_timeo = so->so_timeo; diff --git a/freebsd/sys/kern/uipc_syscalls.c b/freebsd/sys/kern/uipc_syscalls.c index 0872aa62..9c4c52e4 100644 --- a/freebsd/sys/kern/uipc_syscalls.c +++ b/freebsd/sys/kern/uipc_syscalls.c @@ -60,6 +60,8 @@ __FBSDID("$FreeBSD$"); #include <sys/socketvar.h> #include <sys/syscallsubr.h> #include <sys/uio.h> +#include <sys/un.h> +#include <sys/unpcb.h> #ifdef KTRACE #include <sys/ktrace.h> #endif @@ -831,6 +833,15 @@ kern_socketpair(struct thread *td, int domain, int type, int protocol, error = soconnect2(so2, so1); if (error != 0) goto free4; + } else if (so1->so_proto->pr_flags & PR_CONNREQUIRED) { + struct unpcb *unp, *unp2; + unp = sotounpcb(so1); + unp2 = sotounpcb(so2); + /* + * No need to lock the unps, because the sockets are brand-new. + * No other threads can be using them yet + */ + unp_copy_peercred(td, unp, unp2, unp); } finit(fp1, FREAD | FWRITE | fflag, DTYPE_SOCKET, fp1->f_data, &socketops); @@ -1260,7 +1271,7 @@ kern_recvit(struct thread *td, int s, struct msghdr *mp, enum uio_seg fromseg, { struct uio auio; struct iovec *iov; - struct mbuf *m, *control = NULL; + struct mbuf *control, *m; caddr_t ctlbuf; struct file *fp; struct socket *so; @@ -1307,6 +1318,7 @@ kern_recvit(struct thread *td, int s, struct msghdr *mp, enum uio_seg fromseg, if (KTRPOINT(td, KTR_GENIO)) ktruio = cloneuio(&auio); #endif + control = NULL; len = auio.uio_resid; error = soreceive(so, &fromsa, &auio, NULL, (mp->msg_control || controlp) ? &control : NULL, @@ -1370,30 +1382,22 @@ kern_recvit(struct thread *td, int s, struct msghdr *mp, enum uio_seg fromseg, control->m_data += sizeof (struct cmsghdr); } #endif + ctlbuf = mp->msg_control; len = mp->msg_controllen; - m = control; mp->msg_controllen = 0; - ctlbuf = mp->msg_control; - - while (m && len > 0) { - unsigned int tocopy; - - if (len >= m->m_len) - tocopy = m->m_len; - else { - mp->msg_flags |= MSG_CTRUNC; - tocopy = len; - } - - if ((error = copyout(mtod(m, caddr_t), - ctlbuf, tocopy)) != 0) + for (m = control; m != NULL && len >= m->m_len; m = m->m_next) { + if ((error = copyout(mtod(m, caddr_t), ctlbuf, + m->m_len)) != 0) goto out; - ctlbuf += tocopy; - len -= tocopy; - m = m->m_next; + ctlbuf += m->m_len; + len -= m->m_len; + mp->msg_controllen += m->m_len; + } + if (m != NULL) { + mp->msg_flags |= MSG_CTRUNC; + m_dispose_extcontrolm(m); } - mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control; } out: fdrop(fp, td); @@ -1405,8 +1409,11 @@ out: if (error == 0 && controlp != NULL) *controlp = control; - else if (control) + else if (control != NULL) { + if (error != 0) + m_dispose_extcontrolm(control); m_freem(control); + } return (error); } @@ -2134,3 +2141,51 @@ getsockaddr(struct sockaddr **namp, caddr_t uaddr, size_t len) return (0); #endif /* __rtems__ */ } + +/* + * Dispose of externalized rights from an SCM_RIGHTS message. This function + * should be used in error or truncation cases to avoid leaking file descriptors + * into the recipient's (the current thread's) table. + */ +void +m_dispose_extcontrolm(struct mbuf *m) +{ + struct cmsghdr *cm; + struct file *fp; + struct thread *td; + socklen_t clen, datalen; + int error, fd, *fds, nfd; + + td = curthread; + for (; m != NULL; m = m->m_next) { + if (m->m_type != MT_EXTCONTROL) + continue; + cm = mtod(m, struct cmsghdr *); + clen = m->m_len; + while (clen > 0) { + if (clen < sizeof(*cm)) + panic("%s: truncated mbuf %p", __func__, m); + datalen = CMSG_SPACE(cm->cmsg_len - CMSG_SPACE(0)); + if (clen < datalen) + panic("%s: truncated mbuf %p", __func__, m); + + if (cm->cmsg_level == SOL_SOCKET && + cm->cmsg_type == SCM_RIGHTS) { + fds = (int *)CMSG_DATA(cm); + nfd = (cm->cmsg_len - CMSG_SPACE(0)) / + sizeof(int); + + while (nfd-- > 0) { + fd = *fds++; + error = fget(td, fd, &cap_no_rights, + &fp); + if (error == 0) + fdclose(td, fp, fd); + } + } + clen -= datalen; + cm = (struct cmsghdr *)((uint8_t *)cm + datalen); + } + m_chtype(m, MT_CONTROL); + } +} diff --git a/freebsd/sys/kern/uipc_usrreq.c b/freebsd/sys/kern/uipc_usrreq.c index 688682d4..c1885ed6 100644 --- a/freebsd/sys/kern/uipc_usrreq.c +++ b/freebsd/sys/kern/uipc_usrreq.c @@ -376,33 +376,32 @@ unp_pcb_lock2(struct unpcb *unp, struct unpcb *unp2) } static __noinline void -unp_pcb_owned_lock2_slowpath(struct unpcb *unp, struct unpcb **unp2p, int *freed) - +unp_pcb_owned_lock2_slowpath(struct unpcb *unp, struct unpcb **unp2p, + int *freed) { struct unpcb *unp2; unp2 = *unp2p; - unp_pcb_hold((unp2)); - UNP_PCB_UNLOCK((unp)); - UNP_PCB_LOCK((unp2)); - UNP_PCB_LOCK((unp)); - *freed = unp_pcb_rele((unp2)); + unp_pcb_hold(unp2); + UNP_PCB_UNLOCK(unp); + UNP_PCB_LOCK(unp2); + UNP_PCB_LOCK(unp); + *freed = unp_pcb_rele(unp2); if (*freed) *unp2p = NULL; } -#define unp_pcb_owned_lock2(unp, unp2, freed) do { \ - freed = 0; \ - UNP_PCB_LOCK_ASSERT((unp)); \ - UNP_PCB_UNLOCK_ASSERT((unp2)); \ - MPASS(unp != unp2); \ - if (__predict_true(UNP_PCB_TRYLOCK((unp2)))) \ - break; \ - else if ((uintptr_t)(unp2) > (uintptr_t)(unp)) \ - UNP_PCB_LOCK((unp2)); \ - else { \ - unp_pcb_owned_lock2_slowpath((unp), &(unp2), &freed); \ - } \ +#define unp_pcb_owned_lock2(unp, unp2, freed) do { \ + freed = 0; \ + UNP_PCB_LOCK_ASSERT(unp); \ + UNP_PCB_UNLOCK_ASSERT(unp2); \ + MPASS((unp) != (unp2)); \ + if (__predict_true(UNP_PCB_TRYLOCK(unp2))) \ + break; \ + else if ((uintptr_t)(unp2) > (uintptr_t)(unp)) \ + UNP_PCB_LOCK(unp2); \ + else \ + unp_pcb_owned_lock2_slowpath((unp), &(unp2), &freed); \ } while (0) @@ -992,21 +991,19 @@ uipc_disconnect(struct socket *so) UNP_PCB_UNLOCK(unp); return (0); } - if (unp == unp2) { - if (unp_pcb_rele(unp) == 0) + if (__predict_true(unp != unp2)) { + unp_pcb_owned_lock2(unp, unp2, freed); + if (__predict_false(freed)) { UNP_PCB_UNLOCK(unp); + return (0); + } + unp_pcb_hold(unp2); } - unp_pcb_owned_lock2(unp, unp2, freed); - if (__predict_false(freed)) { - UNP_PCB_UNLOCK(unp); - return (0); - } - unp_pcb_hold(unp2); unp_pcb_hold(unp); unp_disconnect(unp, unp2); if (unp_pcb_rele(unp) == 0) UNP_PCB_UNLOCK(unp); - if (unp_pcb_rele(unp2) == 0) + if ((unp != unp2) && unp_pcb_rele(unp2) == 0) UNP_PCB_UNLOCK(unp2); return (0); } @@ -1305,16 +1302,22 @@ uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, control = unp_addsockcred(td, control); #endif /* __rtems__ */ } + /* - * Send to paired receive port, and then reduce send buffer - * hiwater marks to maintain backpressure. Wake up readers. + * Send to paired receive port and wake up readers. Don't + * check for space available in the receive buffer if we're + * attaching ancillary data; Unix domain sockets only check + * for space in the sending sockbuf, and that check is + * performed one level up the stack. At that level we cannot + * precisely account for the amount of buffer space used + * (e.g., because control messages are not yet internalized). */ switch (so->so_type) { case SOCK_STREAM: if (control != NULL) { - if (sbappendcontrol_locked(&so2->so_rcv, m, - control)) - control = NULL; + sbappendcontrol_locked(&so2->so_rcv, m, + control); + control = NULL; } else sbappend_locked(&so2->so_rcv, m, flags); break; @@ -1323,14 +1326,8 @@ uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, const struct sockaddr *from; from = &sun_noname; - /* - * Don't check for space available in so2->so_rcv. - * Unix domain sockets only check for space in the - * sending sockbuf, and that check is performed one - * level up the stack. - */ if (sbappendaddr_nospacecheck_locked(&so2->so_rcv, - from, m, control)) + from, m, control)) control = NULL; break; } @@ -1396,14 +1393,21 @@ uipc_ready(struct socket *so, struct mbuf *m, int count) unp = sotounpcb(so); - UNP_LINK_RLOCK(); + UNP_PCB_LOCK(unp); if ((unp2 = unp->unp_conn) == NULL) { - UNP_LINK_RUNLOCK(); - for (int i = 0; i < count; i++) - m = m_free(m); - return (ECONNRESET); + UNP_PCB_UNLOCK(unp); + goto error; + } + if (unp != unp2) { + if (UNP_PCB_TRYLOCK(unp2) == 0) { + unp_pcb_hold(unp2); + UNP_PCB_UNLOCK(unp); + UNP_PCB_LOCK(unp2); + if (unp_pcb_rele(unp2)) + goto error; + } else + UNP_PCB_UNLOCK(unp); } - UNP_PCB_LOCK(unp2); so2 = unp2->unp_socket; SOCKBUF_LOCK(&so2->so_rcv); @@ -1413,9 +1417,12 @@ uipc_ready(struct socket *so, struct mbuf *m, int count) SOCKBUF_UNLOCK(&so2->so_rcv); UNP_PCB_UNLOCK(unp2); - UNP_LINK_RUNLOCK(); return (error); + error: + for (int i = 0; i < count; i++) + m = m_free(m); + return (ECONNRESET); } static int @@ -1778,24 +1785,8 @@ unp_connectat(int fd, struct socket *so, struct sockaddr *nam, sa = NULL; } - /* - * The connector's (client's) credentials are copied from its - * process structure at the time of connect() (which is now). - */ - cru2x(td->td_ucred, &unp3->unp_peercred); - unp3->unp_flags |= UNP_HAVEPC; + unp_copy_peercred(td, unp3, unp, unp2); - /* - * The receiver's (server's) credentials are copied from the - * unp_peercred member of socket on which the former called - * listen(); uipc_listen() cached that process's credentials - * at that time so we can use them now. - */ - memcpy(&unp->unp_peercred, &unp2->unp_peercred, - sizeof(unp->unp_peercred)); - unp->unp_flags |= UNP_HAVEPC; - if (unp2->unp_flags & UNP_WANTCRED) - unp3->unp_flags |= UNP_WANTCRED; UNP_PCB_UNLOCK(unp2); unp2 = unp3; unp_pcb_owned_lock2(unp2, unp, freed); @@ -1838,6 +1829,27 @@ bad: return (error); } +/* + * Set socket peer credentials at connection time. + * + * The client's PCB credentials are copied from its process structure. The + * server's PCB credentials are copied from the socket on which it called + * listen(2). uipc_listen cached that process's credentials at the time. + */ +void +unp_copy_peercred(struct thread *td, struct unpcb *client_unp, + struct unpcb *server_unp, struct unpcb *listen_unp) +{ + cru2x(td->td_ucred, &client_unp->unp_peercred); + client_unp->unp_flags |= UNP_HAVEPC; + + memcpy(&server_unp->unp_peercred, &listen_unp->unp_peercred, + sizeof(server_unp->unp_peercred)); + server_unp->unp_flags |= UNP_HAVEPC; + if (listen_unp->unp_flags & UNP_WANTCRED) + client_unp->unp_flags |= UNP_WANTCRED; +} + static int unp_connect2(struct socket *so, struct socket *so2, int req) { @@ -2026,7 +2038,7 @@ unp_pcblist(SYSCTL_HANDLER_ARGS) if (freeunp == 0 && unp->unp_gencnt <= gencnt) { xu->xu_len = sizeof *xu; - xu->xu_unpp = unp; + xu->xu_unpp = (uintptr_t)unp; /* * XXX - need more locking here to protect against * connect/disconnect races for SMP. @@ -2043,10 +2055,10 @@ unp_pcblist(SYSCTL_HANDLER_ARGS) unp->unp_conn->unp_addr->sun_len); else bzero(&xu->xu_caddr, sizeof(xu->xu_caddr)); - xu->unp_vnode = unp->unp_vnode; - xu->unp_conn = unp->unp_conn; - xu->xu_firstref = LIST_FIRST(&unp->unp_refs); - xu->xu_nextref = LIST_NEXT(unp, unp_reflink); + xu->unp_vnode = (uintptr_t)unp->unp_vnode; + xu->unp_conn = (uintptr_t)unp->unp_conn; + xu->xu_firstref = (uintptr_t)LIST_FIRST(&unp->unp_refs); + xu->xu_nextref = (uintptr_t)LIST_NEXT(unp, unp_reflink); xu->unp_gencnt = unp->unp_gencnt; sotoxsocket(unp->unp_socket, &xu->xu_socket); UNP_PCB_UNLOCK(unp); @@ -2220,6 +2232,13 @@ unp_externalize(struct mbuf *control, struct mbuf **controlp, int flags) &fdep[i]->fde_caps); unp_externalize_fp(fdep[i]->fde_file); } + + /* + * The new type indicates that the mbuf data refers to + * kernel resources that may need to be released before + * the mbuf is freed. + */ + m_chtype(*controlp, MT_EXTCONTROL); FILEDESC_XUNLOCK(fdesc); free(fdep[0], M_FILECAPS); } else { |