diff options
author | Sebastian Huber <sebastian.huber@embedded-brains.de> | 2017-01-09 14:47:04 +0100 |
---|---|---|
committer | Sebastian Huber <sebastian.huber@embedded-brains.de> | 2017-01-10 11:03:27 +0100 |
commit | 0577772720a4ecb050a230f75346f90b246e93c8 (patch) | |
tree | a3fba8bb57e77c932e08dd0d4bbe49adb6312e5e /freebsd/sys/kern | |
parent | Update to FreeBSD head 2016-12-10 (diff) | |
download | rtems-libbsd-0577772720a4ecb050a230f75346f90b246e93c8.tar.bz2 |
Update to FreeBSD head 2017-01-09
Git mirror commit 1f8e4a995a6ede4bdb24e6d335ccda2bdb0175ab.
Diffstat (limited to 'freebsd/sys/kern')
-rw-r--r-- | freebsd/sys/kern/kern_event.c | 272 | ||||
-rw-r--r-- | freebsd/sys/kern/kern_synch.c | 3 | ||||
-rw-r--r-- | freebsd/sys/kern/kern_sysctl.c | 36 | ||||
-rw-r--r-- | freebsd/sys/kern/subr_bus.c | 4 | ||||
-rw-r--r-- | freebsd/sys/kern/subr_counter.c | 56 | ||||
-rw-r--r-- | freebsd/sys/kern/subr_sleepqueue.c | 11 | ||||
-rw-r--r-- | freebsd/sys/kern/sys_socket.c | 2 | ||||
-rw-r--r-- | freebsd/sys/kern/uipc_socket.c | 2 |
8 files changed, 250 insertions, 136 deletions
diff --git a/freebsd/sys/kern/kern_event.c b/freebsd/sys/kern/kern_event.c index ca9c1de7..473414ae 100644 --- a/freebsd/sys/kern/kern_event.c +++ b/freebsd/sys/kern/kern_event.c @@ -50,7 +50,6 @@ __FBSDID("$FreeBSD$"); #include <sys/fcntl.h> #include <sys/kthread.h> #include <sys/selinfo.h> -#include <sys/stdatomic.h> #include <sys/queue.h> #include <sys/event.h> #include <sys/eventvar.h> @@ -71,13 +70,12 @@ __FBSDID("$FreeBSD$"); #ifdef KTRACE #include <sys/ktrace.h> #endif +#include <machine/atomic.h> #include <vm/uma.h> #ifdef __rtems__ #include <machine/rtems-bsd-syscall-api.h> -#define KN_INFLUX 0x10 /* knote is in flux */ - /* Maintain a global kqueue list on RTEMS */ static struct kqlist fd_kqlist; #endif /* __rtems__ */ @@ -153,6 +151,7 @@ static const rtems_filesystem_file_handlers_r kqueueops; static int knote_attach(struct knote *kn, struct kqueue *kq); static void knote_drop(struct knote *kn, struct thread *td); +static void knote_drop_detached(struct knote *kn, struct thread *td); static void knote_enqueue(struct knote *kn); static void knote_dequeue(struct knote *kn); static void knote_init(void); @@ -209,12 +208,12 @@ static struct filterops user_filtops = { }; static uma_zone_t knote_zone; -static atomic_uint kq_ncallouts = ATOMIC_VAR_INIT(0); +static unsigned int kq_ncallouts = 0; static unsigned int kq_calloutmax = 4 * 1024; SYSCTL_UINT(_kern, OID_AUTO, kq_calloutmax, CTLFLAG_RW, &kq_calloutmax, 0, "Maximum number of callouts allocated for kqueue"); -/* XXX - ensure not KN_INFLUX?? */ +/* XXX - ensure not influx ? */ #define KNOTE_ACTIVATE(kn, islock) do { \ if ((islock)) \ mtx_assert(&(kn)->kn_kq->kq_lock, MA_OWNED); \ @@ -275,6 +274,32 @@ kn_list_unlock(struct knlist *knl) } } +static bool +kn_in_flux(struct knote *kn) +{ + + return (kn->kn_influx > 0); +} + +static void +kn_enter_flux(struct knote *kn) +{ + + KQ_OWNED(kn->kn_kq); + MPASS(kn->kn_influx < INT_MAX); + kn->kn_influx++; +} + +static bool +kn_leave_flux(struct knote *kn) +{ + + KQ_OWNED(kn->kn_kq); + MPASS(kn->kn_influx > 0); + kn->kn_influx--; + return (kn->kn_influx == 0); +} + #define KNL_ASSERT_LOCK(knl, islocked) do { \ if (islocked) \ KNL_ASSERT_LOCKED(knl); \ @@ -538,7 +563,7 @@ knote_fork(struct knlist *list, int pid) SLIST_FOREACH(kn, &list->kl_list, kn_selnext) { kq = kn->kn_kq; KQ_LOCK(kq); - if ((kn->kn_status & (KN_INFLUX | KN_SCAN)) == KN_INFLUX) { + if (kn_in_flux(kn) && (kn->kn_status & KN_SCAN) == 0) { KQ_UNLOCK(kq); continue; } @@ -561,7 +586,7 @@ knote_fork(struct knlist *list, int pid) * track the child. Drop the locks in preparation for * the call to kqueue_register(). */ - kn->kn_status |= KN_INFLUX; + kn_enter_flux(kn); KQ_UNLOCK(kq); list->kl_unlock(list->kl_lockarg); @@ -601,7 +626,7 @@ knote_fork(struct knlist *list, int pid) if (kn->kn_fop->f_event(kn, NOTE_FORK)) KNOTE_ACTIVATE(kn, 0); KQ_LOCK(kq); - kn->kn_status &= ~KN_INFLUX; + kn_leave_flux(kn); KQ_UNLOCK_FLUX(kq); list->kl_lock(list->kl_lockarg); } @@ -632,7 +657,7 @@ timer2sbintime(intptr_t data, int flags) case NOTE_SECONDS: #ifdef __LP64__ if (data > (SBT_MAX / SBT_1S)) - return SBT_MAX; + return (SBT_MAX); #endif return ((sbintime_t)data << 32); case NOTE_MSECONDS: /* FALLTHROUGH */ @@ -641,7 +666,7 @@ timer2sbintime(intptr_t data, int flags) int64_t secs = data / 1000; #ifdef __LP64__ if (secs > (SBT_MAX / SBT_1S)) - return SBT_MAX; + return (SBT_MAX); #endif return (secs << 32 | MS_TO_SBT(data % 1000)); } @@ -651,7 +676,7 @@ timer2sbintime(intptr_t data, int flags) int64_t secs = data / 1000000; #ifdef __LP64__ if (secs > (SBT_MAX / SBT_1S)) - return SBT_MAX; + return (SBT_MAX); #endif return (secs << 32 | US_TO_SBT(data % 1000000)); } @@ -661,34 +686,40 @@ timer2sbintime(intptr_t data, int flags) int64_t secs = data / 1000000000; #ifdef __LP64__ if (secs > (SBT_MAX / SBT_1S)) - return SBT_MAX; + return (SBT_MAX); #endif return (secs << 32 | US_TO_SBT(data % 1000000000)); } - return NS_TO_SBT(data); + return (NS_TO_SBT(data)); default: break; } return (-1); } +struct kq_timer_cb_data { + struct callout c; + sbintime_t next; /* next timer event fires at */ + sbintime_t to; /* precalculated timer period */ +}; + static void filt_timerexpire(void *knx) { - struct callout *calloutp; struct knote *kn; + struct kq_timer_cb_data *kc; kn = knx; kn->kn_data++; KNOTE_ACTIVATE(kn, 0); /* XXX - handle locking */ - if ((kn->kn_flags & EV_ONESHOT) != EV_ONESHOT) { - calloutp = (struct callout *)kn->kn_hook; - *kn->kn_ptr.p_nexttime += timer2sbintime(kn->kn_sdata, - kn->kn_sfflags); - callout_reset_sbt_on(calloutp, *kn->kn_ptr.p_nexttime, 0, - filt_timerexpire, kn, PCPU_GET(cpuid), C_ABSOLUTE); - } + if ((kn->kn_flags & EV_ONESHOT) != 0) + return; + + kc = kn->kn_ptr.p_v; + kc->next += kc->to; + callout_reset_sbt_on(&kc->c, kc->next, 0, filt_timerexpire, kn, + PCPU_GET(cpuid), C_ABSOLUTE); } /* @@ -697,39 +728,36 @@ filt_timerexpire(void *knx) static int filt_timerattach(struct knote *kn) { - struct callout *calloutp; + struct kq_timer_cb_data *kc; sbintime_t to; unsigned int ncallouts; - if ((intptr_t)kn->kn_sdata < 0) + if (kn->kn_sdata < 0) return (EINVAL); - if ((intptr_t)kn->kn_sdata == 0 && (kn->kn_flags & EV_ONESHOT) == 0) + if (kn->kn_sdata == 0 && (kn->kn_flags & EV_ONESHOT) == 0) kn->kn_sdata = 1; /* Only precision unit are supported in flags so far */ - if (kn->kn_sfflags & ~NOTE_TIMER_PRECMASK) + if ((kn->kn_sfflags & ~NOTE_TIMER_PRECMASK) != 0) return (EINVAL); to = timer2sbintime(kn->kn_sdata, kn->kn_sfflags); if (to < 0) return (EINVAL); - ncallouts = atomic_load_explicit(&kq_ncallouts, memory_order_relaxed); do { + ncallouts = kq_ncallouts; if (ncallouts >= kq_calloutmax) return (ENOMEM); - } while (!atomic_compare_exchange_weak_explicit(&kq_ncallouts, - &ncallouts, ncallouts + 1, memory_order_relaxed, - memory_order_relaxed)); + } while (!atomic_cmpset_int(&kq_ncallouts, ncallouts, ncallouts + 1)); kn->kn_flags |= EV_CLEAR; /* automatically set */ kn->kn_status &= ~KN_DETACHED; /* knlist_add clears it */ - kn->kn_ptr.p_nexttime = malloc(sizeof(sbintime_t), M_KQUEUE, M_WAITOK); - calloutp = malloc(sizeof(*calloutp), M_KQUEUE, M_WAITOK); - callout_init(calloutp, 1); - kn->kn_hook = calloutp; - *kn->kn_ptr.p_nexttime = to + sbinuptime(); - callout_reset_sbt_on(calloutp, *kn->kn_ptr.p_nexttime, 0, - filt_timerexpire, kn, PCPU_GET(cpuid), C_ABSOLUTE); + kn->kn_ptr.p_v = kc = malloc(sizeof(*kc), M_KQUEUE, M_WAITOK); + callout_init(&kc->c, 1); + kc->next = to + sbinuptime(); + kc->to = to; + callout_reset_sbt_on(&kc->c, kc->next, 0, filt_timerexpire, kn, + PCPU_GET(cpuid), C_ABSOLUTE); return (0); } @@ -737,14 +765,13 @@ filt_timerattach(struct knote *kn) static void filt_timerdetach(struct knote *kn) { - struct callout *calloutp; + struct kq_timer_cb_data *kc; unsigned int old; - calloutp = (struct callout *)kn->kn_hook; - callout_drain(calloutp); - free(calloutp, M_KQUEUE); - free(kn->kn_ptr.p_nexttime, M_KQUEUE); - old = atomic_fetch_sub_explicit(&kq_ncallouts, 1, memory_order_relaxed); + kc = kn->kn_ptr.p_v; + callout_drain(&kc->c); + free(kc, M_KQUEUE); + old = atomic_fetchadd_int(&kq_ncallouts, -1); KASSERT(old > 0, ("Number of callouts cannot become negative")); kn->kn_status |= KN_DETACHED; /* knlist_remove sets it */ } @@ -1386,7 +1413,7 @@ findkn: } /* knote is in the process of changing, wait for it to stabilize. */ - if (kn != NULL && (kn->kn_status & KN_INFLUX) == KN_INFLUX) { + if (kn != NULL && kn_in_flux(kn)) { KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal); if (filedesc_unlock) { FILEDESC_XUNLOCK(td->td_proc->p_fd); @@ -1430,7 +1457,8 @@ findkn: kn->kn_kevent = *kev; kn->kn_kevent.flags &= ~(EV_ADD | EV_DELETE | EV_ENABLE | EV_DISABLE | EV_FORCEONESHOT); - kn->kn_status = KN_INFLUX|KN_DETACHED; + kn->kn_status = KN_DETACHED; + kn_enter_flux(kn); error = knote_attach(kn, kq); KQ_UNLOCK(kq); @@ -1440,7 +1468,7 @@ findkn: } if ((error = kn->kn_fop->f_attach(kn)) != 0) { - knote_drop(kn, td); + knote_drop_detached(kn, td); goto done; } knl = kn_list_lock(kn); @@ -1454,10 +1482,8 @@ findkn: } if (kev->flags & EV_DELETE) { - kn->kn_status |= KN_INFLUX; + kn_enter_flux(kn); KQ_UNLOCK(kq); - if (!(kn->kn_status & KN_DETACHED)) - kn->kn_fop->f_detach(kn); knote_drop(kn, td); goto done; } @@ -1472,7 +1498,8 @@ findkn: * but doing so will not reset any filter which has already been * triggered. */ - kn->kn_status |= KN_INFLUX | KN_SCAN; + kn->kn_status |= KN_SCAN; + kn_enter_flux(kn); KQ_UNLOCK(kq); knl = kn_list_lock(kn); kn->kn_kevent.udata = kev->udata; @@ -1507,7 +1534,8 @@ done_ev_add: if ((kn->kn_status & (KN_ACTIVE | KN_DISABLED | KN_QUEUED)) == KN_ACTIVE) knote_enqueue(kn); - kn->kn_status &= ~(KN_INFLUX | KN_SCAN); + kn->kn_status &= ~KN_SCAN; + kn_leave_flux(kn); kn_list_unlock(knl); KQ_UNLOCK_FLUX(kq); @@ -1674,7 +1702,7 @@ kqueue_task(void *arg, int pending) /* * Scan, update kn_data (if not ONESHOT), and copyout triggered events. - * We treat KN_MARKER knotes as if they are INFLUX. + * We treat KN_MARKER knotes as if they are in flux. */ static int kqueue_scan(struct kqueue *kq, int maxevents, struct kevent_copyops *k_ops, @@ -1748,7 +1776,7 @@ retry: kn = TAILQ_FIRST(&kq->kq_head); if ((kn->kn_status == KN_MARKER && kn != marker) || - (kn->kn_status & KN_INFLUX) == KN_INFLUX) { + kn_in_flux(kn)) { if (influx) { influx = 0; KQ_FLUX_WAKEUP(kq); @@ -1771,40 +1799,37 @@ retry: goto retry; goto done; } - KASSERT((kn->kn_status & KN_INFLUX) == 0, - ("KN_INFLUX set when not suppose to be")); + KASSERT(!kn_in_flux(kn), + ("knote %p is unexpectedly in flux", kn)); if ((kn->kn_flags & EV_DROP) == EV_DROP) { kn->kn_status &= ~KN_QUEUED; - kn->kn_status |= KN_INFLUX; + kn_enter_flux(kn); kq->kq_count--; KQ_UNLOCK(kq); /* - * We don't need to lock the list since we've marked - * it _INFLUX. + * We don't need to lock the list since we've + * marked it as in flux. */ - if (!(kn->kn_status & KN_DETACHED)) - kn->kn_fop->f_detach(kn); knote_drop(kn, td); KQ_LOCK(kq); continue; } else if ((kn->kn_flags & EV_ONESHOT) == EV_ONESHOT) { kn->kn_status &= ~KN_QUEUED; - kn->kn_status |= KN_INFLUX; + kn_enter_flux(kn); kq->kq_count--; KQ_UNLOCK(kq); /* - * We don't need to lock the list since we've marked - * it _INFLUX. + * We don't need to lock the list since we've + * marked the knote as being in flux. */ *kevp = kn->kn_kevent; - if (!(kn->kn_status & KN_DETACHED)) - kn->kn_fop->f_detach(kn); knote_drop(kn, td); KQ_LOCK(kq); kn = NULL; } else { - kn->kn_status |= KN_INFLUX | KN_SCAN; + kn->kn_status |= KN_SCAN; + kn_enter_flux(kn); KQ_UNLOCK(kq); if ((kn->kn_status & KN_KQUEUE) == KN_KQUEUE) KQ_GLOBAL_LOCK(&kq_global, haskqglobal); @@ -1812,9 +1837,9 @@ retry: if (kn->kn_fop->f_event(kn, 0) == 0) { KQ_LOCK(kq); KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal); - kn->kn_status &= - ~(KN_QUEUED | KN_ACTIVE | KN_INFLUX | + kn->kn_status &= ~(KN_QUEUED | KN_ACTIVE | KN_SCAN); + kn_leave_flux(kn); kq->kq_count--; kn_list_unlock(knl); influx = 1; @@ -1844,7 +1869,8 @@ retry: } else TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); - kn->kn_status &= ~(KN_INFLUX | KN_SCAN); + kn->kn_status &= ~KN_SCAN; + kn_leave_flux(kn); kn_list_unlock(knl); influx = 1; } @@ -2019,15 +2045,13 @@ kqueue_drain(struct kqueue *kq, struct thread *td) for (i = 0; i < kq->kq_knlistsize; i++) { while ((kn = SLIST_FIRST(&kq->kq_knlist[i])) != NULL) { - if ((kn->kn_status & KN_INFLUX) == KN_INFLUX) { + if (kn_in_flux(kn)) { kq->kq_state |= KQ_FLUXWAIT; msleep(kq, &kq->kq_lock, PSOCK, "kqclo1", 0); continue; } - kn->kn_status |= KN_INFLUX; + kn_enter_flux(kn); KQ_UNLOCK(kq); - if (!(kn->kn_status & KN_DETACHED)) - kn->kn_fop->f_detach(kn); knote_drop(kn, td); KQ_LOCK(kq); } @@ -2035,16 +2059,14 @@ kqueue_drain(struct kqueue *kq, struct thread *td) if (kq->kq_knhashmask != 0) { for (i = 0; i <= kq->kq_knhashmask; i++) { while ((kn = SLIST_FIRST(&kq->kq_knhash[i])) != NULL) { - if ((kn->kn_status & KN_INFLUX) == KN_INFLUX) { + if (kn_in_flux(kn)) { kq->kq_state |= KQ_FLUXWAIT; msleep(kq, &kq->kq_lock, PSOCK, "kqclo2", 0); continue; } - kn->kn_status |= KN_INFLUX; + kn_enter_flux(kn); KQ_UNLOCK(kq); - if (!(kn->kn_status & KN_DETACHED)) - kn->kn_fop->f_detach(kn); knote_drop(kn, td); KQ_LOCK(kq); } @@ -2205,7 +2227,7 @@ knote(struct knlist *list, long hint, int lockflags) list->kl_lock(list->kl_lockarg); /* - * If we unlock the list lock (and set KN_INFLUX), we can + * If we unlock the list lock (and enter influx), we can * eliminate the kqueue scheduling, but this will introduce * four lock/unlock's for each knote to test. Also, marker * would be needed to keep iteration position, since filters @@ -2214,7 +2236,7 @@ knote(struct knlist *list, long hint, int lockflags) SLIST_FOREACH_SAFE(kn, &list->kl_list, kn_selnext, tkn) { kq = kn->kn_kq; KQ_LOCK(kq); - if ((kn->kn_status & (KN_INFLUX | KN_SCAN)) == KN_INFLUX) { + if (kn_in_flux(kn) && (kn->kn_status & KN_SCAN) == 0) { /* * Do not process the influx notes, except for * the influx coming from the kq unlock in the @@ -2225,11 +2247,11 @@ knote(struct knlist *list, long hint, int lockflags) */ KQ_UNLOCK(kq); } else if ((lockflags & KNF_NOKQLOCK) != 0) { - kn->kn_status |= KN_INFLUX; + kn_enter_flux(kn); KQ_UNLOCK(kq); error = kn->kn_fop->f_event(kn, hint); KQ_LOCK(kq); - kn->kn_status &= ~KN_INFLUX; + kn_leave_flux(kn); if (error) KNOTE_ACTIVATE(kn, 1); KQ_UNLOCK_FLUX(kq); @@ -2251,10 +2273,12 @@ knote(struct knlist *list, long hint, int lockflags) void knlist_add(struct knlist *knl, struct knote *kn, int islocked) { + KNL_ASSERT_LOCK(knl, islocked); KQ_NOTOWNED(kn->kn_kq); - KASSERT((kn->kn_status & (KN_INFLUX|KN_DETACHED)) == - (KN_INFLUX|KN_DETACHED), ("knote not KN_INFLUX and KN_DETACHED")); + KASSERT(kn_in_flux(kn), ("knote %p not in flux", kn)); + KASSERT((kn->kn_status & KN_DETACHED) != 0, + ("knote %p was not detached", kn)); if (!islocked) knl->kl_lock(knl->kl_lockarg); SLIST_INSERT_HEAD(&knl->kl_list, kn, kn_selnext); @@ -2270,12 +2294,13 @@ static void knlist_remove_kq(struct knlist *knl, struct knote *kn, int knlislocked, int kqislocked) { - KASSERT(!(!!kqislocked && !knlislocked), ("kq locked w/o knl locked")); + + KASSERT(!kqislocked || knlislocked, ("kq locked w/o knl locked")); KNL_ASSERT_LOCK(knl, knlislocked); mtx_assert(&kn->kn_kq->kq_lock, kqislocked ? MA_OWNED : MA_NOTOWNED); - if (!kqislocked) - KASSERT((kn->kn_status & (KN_INFLUX|KN_DETACHED)) == KN_INFLUX, - ("knlist_remove called w/o knote being KN_INFLUX or already removed")); + KASSERT(kqislocked || kn_in_flux(kn), ("knote %p not in flux", kn)); + KASSERT((kn->kn_status & KN_DETACHED) == 0, + ("knote %p was already detached", kn)); if (!knlislocked) knl->kl_lock(knl->kl_lockarg); SLIST_REMOVE(&knl->kl_list, kn, knote, kn_selnext); @@ -2304,12 +2329,12 @@ knlist_empty(struct knlist *knl) { KNL_ASSERT_LOCKED(knl); - return SLIST_EMPTY(&knl->kl_list); + return (SLIST_EMPTY(&knl->kl_list)); } -static struct mtx knlist_lock; +static struct mtx knlist_lock; MTX_SYSINIT(knlist_lock, &knlist_lock, "knlist lock for lockless objects", - MTX_DEF); + MTX_DEF); static void knlist_mtx_lock(void *arg); static void knlist_mtx_unlock(void *arg); @@ -2434,17 +2459,8 @@ void knlist_destroy(struct knlist *knl) { -#ifdef INVARIANTS - /* - * if we run across this error, we need to find the offending - * driver and have it call knlist_clear or knlist_delete. - */ - if (!SLIST_EMPTY(&knl->kl_list)) - printf("WARNING: destroying knlist w/ knotes on it!\n"); -#endif - - knl->kl_lockarg = knl->kl_lock = knl->kl_unlock = NULL; - SLIST_INIT(&knl->kl_list); + KASSERT(KNLIST_EMPTY(knl), + ("destroying knlist %p with knotes on it", knl)); } void @@ -2481,30 +2497,29 @@ again: /* need to reacquire lock since we have dropped it */ SLIST_FOREACH_SAFE(kn, &knl->kl_list, kn_selnext, kn2) { kq = kn->kn_kq; KQ_LOCK(kq); - if ((kn->kn_status & KN_INFLUX)) { + if (kn_in_flux(kn)) { KQ_UNLOCK(kq); continue; } knlist_remove_kq(knl, kn, 1, 1); if (killkn) { - kn->kn_status |= KN_INFLUX | KN_DETACHED; + kn_enter_flux(kn); KQ_UNLOCK(kq); - knote_drop(kn, td); + knote_drop_detached(kn, td); } else { /* Make sure cleared knotes disappear soon */ - kn->kn_flags |= (EV_EOF | EV_ONESHOT); + kn->kn_flags |= EV_EOF | EV_ONESHOT; KQ_UNLOCK(kq); } kq = NULL; } if (!SLIST_EMPTY(&knl->kl_list)) { - /* there are still KN_INFLUX remaining */ + /* there are still in flux knotes remaining */ kn = SLIST_FIRST(&knl->kl_list); kq = kn->kn_kq; KQ_LOCK(kq); - KASSERT(kn->kn_status & KN_INFLUX, - ("knote removed w/o list lock")); + KASSERT(kn_in_flux(kn), ("knote removed w/o list lock")); knl->kl_unlock(knl->kl_lockarg); kq->kq_state |= KQ_FLUXWAIT; msleep(kq, &kq->kq_lock, PSOCK | PDROP, "kqkclr", 0); @@ -2556,7 +2571,7 @@ again: influx = 0; while (kq->kq_knlistsize > fd && (kn = SLIST_FIRST(&kq->kq_knlist[fd])) != NULL) { - if (kn->kn_status & KN_INFLUX) { + if (kn_in_flux(kn)) { /* someone else might be waiting on our knote */ if (influx) wakeup(kq); @@ -2564,12 +2579,10 @@ again: msleep(kq, &kq->kq_lock, PSOCK, "kqflxwt", 0); goto again; } - kn->kn_status |= KN_INFLUX; + kn_enter_flux(kn); KQ_UNLOCK(kq); - if (!(kn->kn_status & KN_DETACHED)) - kn->kn_fop->f_detach(kn); - knote_drop(kn, td); influx = 1; + knote_drop(kn, td); KQ_LOCK(kq); } KQ_UNLOCK_FLUX(kq); @@ -2584,42 +2597,47 @@ knote_attach(struct knote *kn, struct kqueue *kq) { struct klist *list; - KASSERT(kn->kn_status & KN_INFLUX, ("knote not marked INFLUX")); + KASSERT(kn_in_flux(kn), ("knote %p not marked influx", kn)); KQ_OWNED(kq); if (kn->kn_fop->f_isfd) { if (kn->kn_id >= kq->kq_knlistsize) - return ENOMEM; + return (ENOMEM); list = &kq->kq_knlist[kn->kn_id]; } else { if (kq->kq_knhash == NULL) - return ENOMEM; + return (ENOMEM); list = &kq->kq_knhash[KN_HASH(kn->kn_id, kq->kq_knhashmask)]; } - SLIST_INSERT_HEAD(list, kn, kn_link); - - return 0; + return (0); } -/* - * knote must already have been detached using the f_detach method. - * no lock need to be held, it is assumed that the KN_INFLUX flag is set - * to prevent other removal. - */ static void knote_drop(struct knote *kn, struct thread *td) { + + if ((kn->kn_status & KN_DETACHED) == 0) + kn->kn_fop->f_detach(kn); + knote_drop_detached(kn, td); +} + +static void +knote_drop_detached(struct knote *kn, struct thread *td) +{ struct kqueue *kq; struct klist *list; kq = kn->kn_kq; + KASSERT((kn->kn_status & KN_DETACHED) != 0, + ("knote %p still attached", kn)); KQ_NOTOWNED(kq); - KASSERT((kn->kn_status & KN_INFLUX) == KN_INFLUX, - ("knote_drop called without KN_INFLUX set in kn_status")); KQ_LOCK(kq); + KASSERT(kn->kn_influx == 1, + ("knote_drop called on %p with influx %d", kn, kn->kn_influx)); + if (kn->kn_fop->f_isfd) list = &kq->kq_knlist[kn->kn_id]; else @@ -2709,13 +2727,11 @@ kqfd_register(int fd, struct kevent *kev, struct thread *td, int waitok) goto noacquire; error = kqueue_register(kq, kev, td, waitok); - kqueue_release(kq, 0); noacquire: fdrop(fp, td); - - return error; + return (error); } #ifdef __rtems__ static const rtems_filesystem_file_handlers_r kqueueops = { diff --git a/freebsd/sys/kern/kern_synch.c b/freebsd/sys/kern/kern_synch.c index 6a8e7754..7109e798 100644 --- a/freebsd/sys/kern/kern_synch.c +++ b/freebsd/sys/kern/kern_synch.c @@ -353,7 +353,8 @@ pause_sbt(const char *wmesg, sbintime_t sbt, sbintime_t pr, int flags) sbt = tick_sbt; #ifndef __rtems__ - if (cold || kdb_active || SCHEDULER_STOPPED()) { + if ((cold && curthread == &thread0) || kdb_active || + SCHEDULER_STOPPED()) { /* * We delay one second at a time to avoid overflowing the * system specific DELAY() function(s): diff --git a/freebsd/sys/kern/kern_sysctl.c b/freebsd/sys/kern/kern_sysctl.c index 07ddded9..9553f4c7 100644 --- a/freebsd/sys/kern/kern_sysctl.c +++ b/freebsd/sys/kern/kern_sysctl.c @@ -684,6 +684,9 @@ sysctl_remove_oid_locked(struct sysctl_oid *oidp, int del, int recurse) if (oidp->oid_descr) free(__DECONST(char *, oidp->oid_descr), M_SYSCTLOID); + if (oidp->oid_label) + free(__DECONST(char *, oidp->oid_label), + M_SYSCTLOID); free(__DECONST(char *, oidp->oid_name), M_SYSCTLOID); free(oidp, M_SYSCTLOID); } @@ -697,7 +700,8 @@ sysctl_remove_oid_locked(struct sysctl_oid *oidp, int del, int recurse) struct sysctl_oid * sysctl_add_oid(struct sysctl_ctx_list *clist, struct sysctl_oid_list *parent, int number, const char *name, int kind, void *arg1, intmax_t arg2, - int (*handler)(SYSCTL_HANDLER_ARGS), const char *fmt, const char *descr) + int (*handler)(SYSCTL_HANDLER_ARGS), const char *fmt, const char *descr, + const char *label) { struct sysctl_oid *oidp; @@ -734,6 +738,8 @@ sysctl_add_oid(struct sysctl_ctx_list *clist, struct sysctl_oid_list *parent, oidp->oid_fmt = fmt; if (descr != NULL) oidp->oid_descr = strdup(descr, M_SYSCTLOID); + if (label != NULL) + oidp->oid_label = strdup(label, M_SYSCTLOID); /* Update the context, if used */ if (clist != NULL) sysctl_ctx_entry_add(clist, oidp); @@ -821,7 +827,8 @@ SYSINIT(sysctl, SI_SUB_KMEM, SI_ORDER_FIRST, sysctl_register_all, 0); * {0,2,...} return the next OID. * {0,3} return the OID of the name in "new" * {0,4,...} return the kind & format info for the "..." OID. - * {0,5,...} return the description the "..." OID. + * {0,5,...} return the description of the "..." OID. + * {0,6,...} return the aggregation label of the "..." OID. */ #ifdef SYSCTL_DEBUG @@ -1182,6 +1189,31 @@ sysctl_sysctl_oiddescr(SYSCTL_HANDLER_ARGS) static SYSCTL_NODE(_sysctl, 5, oiddescr, CTLFLAG_RD|CTLFLAG_MPSAFE|CTLFLAG_CAPRD, sysctl_sysctl_oiddescr, ""); +static int +sysctl_sysctl_oidlabel(SYSCTL_HANDLER_ARGS) +{ + struct sysctl_oid *oid; + struct rm_priotracker tracker; + int error; + + SYSCTL_RLOCK(&tracker); + error = sysctl_find_oid(arg1, arg2, &oid, NULL, req); + if (error) + goto out; + + if (oid->oid_label == NULL) { + error = ENOENT; + goto out; + } + error = SYSCTL_OUT(req, oid->oid_label, strlen(oid->oid_label) + 1); + out: + SYSCTL_RUNLOCK(&tracker); + return (error); +} + +static SYSCTL_NODE(_sysctl, 6, oidlabel, + CTLFLAG_RD | CTLFLAG_MPSAFE | CTLFLAG_CAPRD, sysctl_sysctl_oidlabel, ""); + /* * Default "handler" functions. */ diff --git a/freebsd/sys/kern/subr_bus.c b/freebsd/sys/kern/subr_bus.c index 1175456c..e8339c93 100644 --- a/freebsd/sys/kern/subr_bus.c +++ b/freebsd/sys/kern/subr_bus.c @@ -292,10 +292,10 @@ device_sysctl_init(device_t dev) return; devclass_sysctl_init(dc); sysctl_ctx_init(&dev->sysctl_ctx); - dev->sysctl_tree = SYSCTL_ADD_NODE(&dev->sysctl_ctx, + dev->sysctl_tree = SYSCTL_ADD_NODE_WITH_LABEL(&dev->sysctl_ctx, SYSCTL_CHILDREN(dc->sysctl_tree), OID_AUTO, dev->nameunit + strlen(dc->name), - CTLFLAG_RD, NULL, ""); + CTLFLAG_RD, NULL, "", "device_index"); SYSCTL_ADD_PROC(&dev->sysctl_ctx, SYSCTL_CHILDREN(dev->sysctl_tree), OID_AUTO, "%desc", CTLTYPE_STRING | CTLFLAG_RD, dev, DEVICE_SYSCTL_DESC, device_sysctl_handler, "A", diff --git a/freebsd/sys/kern/subr_counter.c b/freebsd/sys/kern/subr_counter.c index 2625b179..c34f1cac 100644 --- a/freebsd/sys/kern/subr_counter.c +++ b/freebsd/sys/kern/subr_counter.c @@ -121,3 +121,59 @@ sysctl_handle_counter_u64_array(SYSCTL_HANDLER_ARGS) return (0); } + +/* + * MP-friendly version of ppsratecheck(). + * + * Returns non-negative if we are in the rate, negative otherwise. + * 0 - rate limit not reached. + * -1 - rate limit reached. + * >0 - rate limit was reached before, and was just reset. The return value + * is number of events since last reset. + */ +int64_t +counter_ratecheck(struct counter_rate *cr, int64_t limit) +{ + int64_t val; + int now; + + val = cr->cr_over; + now = ticks; + + if (now - cr->cr_ticks >= hz) { + /* + * Time to clear the structure, we are in the next second. + * First try unlocked read, and then proceed with atomic. + */ + if ((cr->cr_lock == 0) && + atomic_cmpset_acq_int(&cr->cr_lock, 0, 1)) { + /* + * Check if other thread has just went through the + * reset sequence before us. + */ + if (now - cr->cr_ticks >= hz) { + val = counter_u64_fetch(cr->cr_rate); + counter_u64_zero(cr->cr_rate); + cr->cr_over = 0; + cr->cr_ticks = now; + if (val <= limit) + val = 0; + } + atomic_store_rel_int(&cr->cr_lock, 0); + } else + /* + * We failed to lock, in this case other thread may + * be running counter_u64_zero(), so it is not safe + * to do an update, we skip it. + */ + return (val); + } + + counter_u64_add(cr->cr_rate, 1); + if (cr->cr_over != 0) + return (-1); + if (counter_u64_fetch(cr->cr_rate) > limit) + val = cr->cr_over = -1; + + return (val); +} diff --git a/freebsd/sys/kern/subr_sleepqueue.c b/freebsd/sys/kern/subr_sleepqueue.c index a42c62f8..d0288d67 100644 --- a/freebsd/sys/kern/subr_sleepqueue.c +++ b/freebsd/sys/kern/subr_sleepqueue.c @@ -1111,7 +1111,7 @@ int sleepq_broadcast(void *wchan, int flags, int pri, int queue) { struct sleepqueue *sq; - struct thread *td; + struct thread *td, *tdn; int wakeup_swapper; CTR2(KTR_PROC, "sleepq_broadcast(%p, %d)", wchan, flags); @@ -1123,9 +1123,14 @@ sleepq_broadcast(void *wchan, int flags, int pri, int queue) KASSERT(sq->sq_type == (flags & SLEEPQ_TYPE), ("%s: mismatch between sleep/wakeup and cv_*", __func__)); - /* Resume all blocked threads on the sleep queue. */ + /* + * Resume all blocked threads on the sleep queue. The last thread will + * be given ownership of sq and may re-enqueue itself before + * sleepq_resume_thread() returns, so we must cache the "next" queue + * item at the beginning of the final iteration. + */ wakeup_swapper = 0; - while ((td = TAILQ_FIRST(&sq->sq_blocked[queue])) != NULL) { + TAILQ_FOREACH_SAFE(td, &sq->sq_blocked[queue], td_slpq, tdn) { thread_lock(td); wakeup_swapper |= sleepq_resume_thread(sq, td, pri); thread_unlock(td); diff --git a/freebsd/sys/kern/sys_socket.c b/freebsd/sys/kern/sys_socket.c index a2537652..19567b5f 100644 --- a/freebsd/sys/kern/sys_socket.c +++ b/freebsd/sys/kern/sys_socket.c @@ -774,6 +774,8 @@ retry: if (td->td_ru.ru_msgrcv != ru_before) job->msgrcv = 1; } else { + if (!TAILQ_EMPTY(&sb->sb_aiojobq)) + flags |= MSG_MORETOCOME; uio.uio_rw = UIO_WRITE; ru_before = td->td_ru.ru_msgsnd; #ifdef MAC diff --git a/freebsd/sys/kern/uipc_socket.c b/freebsd/sys/kern/uipc_socket.c index c086ee7c..5f01844d 100644 --- a/freebsd/sys/kern/uipc_socket.c +++ b/freebsd/sys/kern/uipc_socket.c @@ -1207,6 +1207,7 @@ sosend_dgram(struct socket *so, struct sockaddr *addr, struct uio *uio, (resid <= 0)) ? PRUS_EOF : /* If there is more to send set PRUS_MORETOCOME */ + (flags & MSG_MORETOCOME) || (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0, top, addr, control, td); if (dontroute) { @@ -1395,6 +1396,7 @@ restart: (resid <= 0)) ? PRUS_EOF : /* If there is more to send set PRUS_MORETOCOME. */ + (flags & MSG_MORETOCOME) || (resid > 0 && space > 0) ? PRUS_MORETOCOME : 0, top, addr, control, td); if (dontroute) { |