diff options
Diffstat (limited to 'freebsd/sys/kern/kern_event.c')
-rw-r--r-- | freebsd/sys/kern/kern_event.c | 831 |
1 files changed, 546 insertions, 285 deletions
diff --git a/freebsd/sys/kern/kern_event.c b/freebsd/sys/kern/kern_event.c index 50f4c696..ca9c1de7 100644 --- a/freebsd/sys/kern/kern_event.c +++ b/freebsd/sys/kern/kern_event.c @@ -32,13 +32,15 @@ __FBSDID("$FreeBSD$"); #include <rtems/bsd/local/opt_ktrace.h> +#include <rtems/bsd/local/opt_kqueue.h> #include <rtems/bsd/sys/param.h> #include <sys/systm.h> -#include <sys/capability.h> +#include <sys/capsicum.h> #include <sys/kernel.h> #include <rtems/bsd/sys/lock.h> #include <sys/mutex.h> +#include <sys/rwlock.h> #include <sys/proc.h> #include <sys/malloc.h> #include <rtems/bsd/sys/unistd.h> @@ -48,11 +50,13 @@ __FBSDID("$FreeBSD$"); #include <sys/fcntl.h> #include <sys/kthread.h> #include <sys/selinfo.h> +#include <sys/stdatomic.h> #include <sys/queue.h> #include <sys/event.h> #include <sys/eventvar.h> #include <sys/poll.h> #include <sys/protosw.h> +#include <sys/resourcevar.h> #include <sys/sigio.h> #include <sys/signalvar.h> #include <sys/socket.h> @@ -63,6 +67,7 @@ __FBSDID("$FreeBSD$"); #include <sys/syscallsubr.h> #include <sys/taskqueue.h> #include <sys/uio.h> +#include <sys/user.h> #ifdef KTRACE #include <sys/ktrace.h> #endif @@ -96,7 +101,7 @@ MTX_SYSINIT(kq_global, &kq_global, "kqueue order", MTX_DEF); haslck = 0; \ } while (0) -TASKQUEUE_DEFINE_THREAD(kqueue); +TASKQUEUE_DEFINE_THREAD(kqueue_ctx); static int kevent_copyout(void *arg, struct kevent *kevp, int count); static int kevent_copyin(void *arg, struct kevent *kevp, int count); @@ -104,6 +109,8 @@ static int kqueue_register(struct kqueue *kq, struct kevent *kev, struct thread *td, int waitok); static int kqueue_acquire(struct file *fp, struct kqueue **kqp); static void kqueue_release(struct kqueue *kq, int locked); +static void kqueue_destroy(struct kqueue *kq); +static void kqueue_drain(struct kqueue *kq, struct thread *td); static int kqueue_expand(struct kqueue *kq, struct filterops *fops, uintptr_t ident, int waitok); static void kqueue_task(void *arg, int pending); @@ -124,11 +131,12 @@ static fo_poll_t kqueue_poll; static fo_kqfilter_t kqueue_kqfilter; static fo_stat_t kqueue_stat; static fo_close_t kqueue_close; +static fo_fill_kinfo_t kqueue_fill_kinfo; static struct fileops kqueueops = { - .fo_read = kqueue_read, - .fo_write = kqueue_write, - .fo_truncate = kqueue_truncate, + .fo_read = invfo_rdwr, + .fo_write = invfo_rdwr, + .fo_truncate = invfo_truncate, .fo_ioctl = kqueue_ioctl, .fo_poll = kqueue_poll, .fo_kqfilter = kqueue_kqfilter, @@ -136,6 +144,8 @@ static struct fileops kqueueops = { .fo_close = kqueue_close, .fo_chmod = invfo_chmod, .fo_chown = invfo_chown, + .fo_sendfile = invfo_sendfile, + .fo_fill_kinfo = kqueue_fill_kinfo, }; #else /* __rtems__ */ static const rtems_filesystem_file_handlers_r kqueueops; @@ -199,9 +209,9 @@ static struct filterops user_filtops = { }; static uma_zone_t knote_zone; -static int kq_ncallouts = 0; -static int kq_calloutmax = (4 * 1024); -SYSCTL_INT(_kern, OID_AUTO, kq_calloutmax, CTLFLAG_RW, +static atomic_uint kq_ncallouts = ATOMIC_VAR_INIT(0); +static unsigned int kq_calloutmax = 4 * 1024; +SYSCTL_UINT(_kern, OID_AUTO, kq_calloutmax, CTLFLAG_RW, &kq_calloutmax, 0, "Maximum number of callouts allocated for kqueue"); /* XXX - ensure not KN_INFLUX?? */ @@ -238,14 +248,33 @@ SYSCTL_INT(_kern, OID_AUTO, kq_calloutmax, CTLFLAG_RW, #define KQ_NOTOWNED(kq) do { \ mtx_assert(&(kq)->kq_lock, MA_NOTOWNED); \ } while (0) -#define KN_LIST_LOCK(kn) do { \ - if (kn->kn_knlist != NULL) \ - kn->kn_knlist->kl_lock(kn->kn_knlist->kl_lockarg); \ -} while (0) -#define KN_LIST_UNLOCK(kn) do { \ - if (kn->kn_knlist != NULL) \ - kn->kn_knlist->kl_unlock(kn->kn_knlist->kl_lockarg); \ -} while (0) + +static struct knlist * +kn_list_lock(struct knote *kn) +{ + struct knlist *knl; + + knl = kn->kn_knlist; + if (knl != NULL) + knl->kl_lock(knl->kl_lockarg); + return (knl); +} + +static void +kn_list_unlock(struct knlist *knl) +{ + bool do_free; + + if (knl == NULL) + return; + do_free = knl->kl_autodestroy && knlist_empty(knl); + knl->kl_unlock(knl->kl_lockarg); + if (do_free) { + knlist_destroy(knl); + free(knl, M_KQUEUE); + } +} + #define KNL_ASSERT_LOCK(knl, islocked) do { \ if (islocked) \ KNL_ASSERT_LOCKED(knl); \ @@ -264,7 +293,10 @@ SYSCTL_INT(_kern, OID_AUTO, kq_calloutmax, CTLFLAG_RW, #define KNL_ASSERT_UNLOCKED(knl) do {} while (0) #endif /* INVARIANTS */ +#ifndef KN_HASHSIZE #define KN_HASHSIZE 64 /* XXX should be tunable */ +#endif + #define KN_HASH(val, mask) (((val) ^ (val >> 8)) & (mask)) static int @@ -291,28 +323,30 @@ MTX_SYSINIT(kqueue_filterops, &filterops_lock, "protect sysfilt_ops", MTX_DEF); static struct { struct filterops *for_fop; + int for_nolock; int for_refcnt; } sysfilt_ops[EVFILT_SYSCOUNT] = { - { &file_filtops }, /* EVFILT_READ */ - { &file_filtops }, /* EVFILT_WRITE */ + { &file_filtops, 1 }, /* EVFILT_READ */ + { &file_filtops, 1 }, /* EVFILT_WRITE */ { &null_filtops }, /* EVFILT_AIO */ - { &file_filtops }, /* EVFILT_VNODE */ + { &file_filtops, 1 }, /* EVFILT_VNODE */ #ifndef __rtems__ - { &proc_filtops }, /* EVFILT_PROC */ - { &sig_filtops }, /* EVFILT_SIGNAL */ + { &proc_filtops, 1 }, /* EVFILT_PROC */ + { &sig_filtops, 1 }, /* EVFILT_SIGNAL */ #else /* __rtems__ */ { &null_filtops }, /* EVFILT_PROC */ { &null_filtops }, /* EVFILT_SIGNAL */ #endif /* __rtems__ */ - { &timer_filtops }, /* EVFILT_TIMER */ + { &timer_filtops, 1 }, /* EVFILT_TIMER */ { &null_filtops }, /* former EVFILT_NETDEV */ #ifndef __rtems__ - { &fs_filtops }, /* EVFILT_FS */ + { &fs_filtops, 1 }, /* EVFILT_FS */ #else /* __rtems__ */ { &null_filtops }, /* EVFILT_FS */ #endif /* __rtems__ */ { &null_filtops }, /* EVFILT_LIO */ - { &user_filtops }, /* EVFILT_USER */ + { &user_filtops, 1 }, /* EVFILT_USER */ + { &null_filtops }, /* EVFILT_SENDFILE */ }; /* @@ -375,16 +409,16 @@ static int filt_procattach(struct knote *kn) { struct proc *p; - int immediate; int error; + bool exiting, immediate; - immediate = 0; + exiting = immediate = false; p = pfind(kn->kn_id); if (p == NULL && (kn->kn_sfflags & NOTE_EXIT)) { p = zpfind(kn->kn_id); - immediate = 1; + exiting = true; } else if (p != NULL && (p->p_flag & P_WEXIT)) { - immediate = 1; + exiting = true; } if (p == NULL) @@ -398,23 +432,33 @@ filt_procattach(struct knote *kn) kn->kn_flags |= EV_CLEAR; /* automatically set */ /* - * internal flag indicating registration done by kernel + * Internal flag indicating registration done by kernel for the + * purposes of getting a NOTE_CHILD notification. */ - if (kn->kn_flags & EV_FLAG1) { + if (kn->kn_flags & EV_FLAG2) { + kn->kn_flags &= ~EV_FLAG2; kn->kn_data = kn->kn_sdata; /* ppid */ kn->kn_fflags = NOTE_CHILD; + kn->kn_sfflags &= ~(NOTE_EXIT | NOTE_EXEC | NOTE_FORK); + immediate = true; /* Force immediate activation of child note. */ + } + /* + * Internal flag indicating registration done by kernel (for other than + * NOTE_CHILD). + */ + if (kn->kn_flags & EV_FLAG1) { kn->kn_flags &= ~EV_FLAG1; } - if (immediate == 0) - knlist_add(&p->p_klist, kn, 1); + knlist_add(p->p_klist, kn, 1); /* - * Immediately activate any exit notes if the target process is a - * zombie. This is necessary to handle the case where the target - * process, e.g. a child, dies before the kevent is registered. + * Immediately activate any child notes or, in the case of a zombie + * target process, exit notes. The latter is necessary to handle the + * case where the target process, e.g. a child, dies before the kevent + * is registered. */ - if (immediate && filt_proc(kn, NOTE_EXIT)) + if (immediate || (exiting && filt_proc(kn, NOTE_EXIT))) KNOTE_ACTIVATE(kn, 0); PROC_UNLOCK(p); @@ -434,10 +478,8 @@ filt_procattach(struct knote *kn) static void filt_procdetach(struct knote *kn) { - struct proc *p; - p = kn->kn_ptr.p_proc; - knlist_remove(&p->p_klist, kn, 0); + knlist_remove(kn->kn_knlist, kn, 0); kn->kn_ptr.p_proc = NULL; } @@ -445,30 +487,26 @@ filt_procdetach(struct knote *kn) static int filt_proc(struct knote *kn, long hint) { - struct proc *p = kn->kn_ptr.p_proc; + struct proc *p; u_int event; - /* - * mask off extra data - */ + p = kn->kn_ptr.p_proc; + if (p == NULL) /* already activated, from attach filter */ + return (0); + + /* Mask off extra data. */ event = (u_int)hint & NOTE_PCTRLMASK; - /* - * if the user is interested in this event, record it. - */ + /* If the user is interested in this event, record it. */ if (kn->kn_sfflags & event) kn->kn_fflags |= event; - /* - * process is gone, so flag the event as finished. - */ + /* Process is gone, so flag the event as finished. */ if (event == NOTE_EXIT) { - if (!(kn->kn_status & KN_DETACHED)) - knlist_remove_inevent(&p->p_klist, kn); - kn->kn_flags |= (EV_EOF | EV_ONESHOT); + kn->kn_flags |= EV_EOF | EV_ONESHOT; kn->kn_ptr.p_proc = NULL; if (kn->kn_fflags & NOTE_EXIT) - kn->kn_data = p->p_xstat; + kn->kn_data = KW_EXITCODE(p->p_xexit, p->p_xsig); if (kn->kn_fflags == 0) kn->kn_flags |= EV_DROP; return (1); @@ -498,8 +536,6 @@ knote_fork(struct knlist *list, int pid) list->kl_lock(list->kl_lockarg); SLIST_FOREACH(kn, &list->kl_list, kn_selnext) { - if ((kn->kn_status & KN_INFLUX) == KN_INFLUX) - continue; kq = kn->kn_kq; KQ_LOCK(kq); if ((kn->kn_status & (KN_INFLUX | KN_SCAN)) == KN_INFLUX) { @@ -521,7 +557,7 @@ knote_fork(struct knlist *list, int pid) /* * The NOTE_TRACK case. In addition to the activation - * of the event, we need to register new event to + * of the event, we need to register new events to * track the child. Drop the locks in preparation for * the call to kqueue_register(). */ @@ -530,8 +566,28 @@ knote_fork(struct knlist *list, int pid) list->kl_unlock(list->kl_lockarg); /* - * Activate existing knote and register a knote with + * Activate existing knote and register tracking knotes with * new process. + * + * First register a knote to get just the child notice. This + * must be a separate note from a potential NOTE_EXIT + * notification since both NOTE_CHILD and NOTE_EXIT are defined + * to use the data field (in conflicting ways). + */ + kev.ident = pid; + kev.filter = kn->kn_filter; + kev.flags = kn->kn_flags | EV_ADD | EV_ENABLE | EV_ONESHOT | + EV_FLAG2; + kev.fflags = kn->kn_sfflags; + kev.data = kn->kn_id; /* parent */ + kev.udata = kn->kn_kevent.udata;/* preserve udata */ + error = kqueue_register(kq, &kev, NULL, 0); + if (error) + kn->kn_fflags |= NOTE_TRACKERR; + + /* + * Then register another knote to track other potential events + * from the new process. */ kev.ident = pid; kev.filter = kn->kn_filter; @@ -557,64 +613,123 @@ knote_fork(struct knlist *list, int pid) * XXX: EVFILT_TIMER should perhaps live in kern_time.c beside the * interval timer support code. */ -static int -timertoticks(intptr_t data) -{ - struct timeval tv; - int tticks; - - tv.tv_sec = data / 1000; - tv.tv_usec = (data % 1000) * 1000; - tticks = tvtohz(&tv); - return tticks; +#define NOTE_TIMER_PRECMASK (NOTE_SECONDS|NOTE_MSECONDS|NOTE_USECONDS| \ + NOTE_NSECONDS) + +static sbintime_t +timer2sbintime(intptr_t data, int flags) +{ + + /* + * Macros for converting to the fractional second portion of an + * sbintime_t using 64bit multiplication to improve precision. + */ +#define NS_TO_SBT(ns) (((ns) * (((uint64_t)1 << 63) / 500000000)) >> 32) +#define US_TO_SBT(us) (((us) * (((uint64_t)1 << 63) / 500000)) >> 32) +#define MS_TO_SBT(ms) (((ms) * (((uint64_t)1 << 63) / 500)) >> 32) + switch (flags & NOTE_TIMER_PRECMASK) { + case NOTE_SECONDS: +#ifdef __LP64__ + if (data > (SBT_MAX / SBT_1S)) + return SBT_MAX; +#endif + return ((sbintime_t)data << 32); + case NOTE_MSECONDS: /* FALLTHROUGH */ + case 0: + if (data >= 1000) { + int64_t secs = data / 1000; +#ifdef __LP64__ + if (secs > (SBT_MAX / SBT_1S)) + return SBT_MAX; +#endif + return (secs << 32 | MS_TO_SBT(data % 1000)); + } + return MS_TO_SBT(data); + case NOTE_USECONDS: + if (data >= 1000000) { + int64_t secs = data / 1000000; +#ifdef __LP64__ + if (secs > (SBT_MAX / SBT_1S)) + return SBT_MAX; +#endif + return (secs << 32 | US_TO_SBT(data % 1000000)); + } + return US_TO_SBT(data); + case NOTE_NSECONDS: + if (data >= 1000000000) { + int64_t secs = data / 1000000000; +#ifdef __LP64__ + if (secs > (SBT_MAX / SBT_1S)) + return SBT_MAX; +#endif + return (secs << 32 | US_TO_SBT(data % 1000000000)); + } + return NS_TO_SBT(data); + default: + break; + } + return (-1); } static void filt_timerexpire(void *knx) { - struct knote *kn = knx; struct callout *calloutp; + struct knote *kn; + kn = knx; kn->kn_data++; KNOTE_ACTIVATE(kn, 0); /* XXX - handle locking */ - /* - * timertoticks() uses tvtohz() which always adds 1 to allow - * for the time until the next clock interrupt being strictly - * less than 1 clock tick. We don't want that here since we - * want to appear to be in sync with the clock interrupt even - * when we're delayed. - */ if ((kn->kn_flags & EV_ONESHOT) != EV_ONESHOT) { calloutp = (struct callout *)kn->kn_hook; - callout_reset_curcpu(calloutp, timertoticks(kn->kn_sdata) - 1, - filt_timerexpire, kn); + *kn->kn_ptr.p_nexttime += timer2sbintime(kn->kn_sdata, + kn->kn_sfflags); + callout_reset_sbt_on(calloutp, *kn->kn_ptr.p_nexttime, 0, + filt_timerexpire, kn, PCPU_GET(cpuid), C_ABSOLUTE); } } /* - * data contains amount of time to sleep, in milliseconds + * data contains amount of time to sleep */ static int filt_timerattach(struct knote *kn) { struct callout *calloutp; + sbintime_t to; + unsigned int ncallouts; - atomic_add_int(&kq_ncallouts, 1); + if ((intptr_t)kn->kn_sdata < 0) + return (EINVAL); + if ((intptr_t)kn->kn_sdata == 0 && (kn->kn_flags & EV_ONESHOT) == 0) + kn->kn_sdata = 1; + /* Only precision unit are supported in flags so far */ + if (kn->kn_sfflags & ~NOTE_TIMER_PRECMASK) + return (EINVAL); - if (kq_ncallouts >= kq_calloutmax) { - atomic_add_int(&kq_ncallouts, -1); - return (ENOMEM); - } + to = timer2sbintime(kn->kn_sdata, kn->kn_sfflags); + if (to < 0) + return (EINVAL); + + ncallouts = atomic_load_explicit(&kq_ncallouts, memory_order_relaxed); + do { + if (ncallouts >= kq_calloutmax) + return (ENOMEM); + } while (!atomic_compare_exchange_weak_explicit(&kq_ncallouts, + &ncallouts, ncallouts + 1, memory_order_relaxed, + memory_order_relaxed)); kn->kn_flags |= EV_CLEAR; /* automatically set */ - kn->kn_status &= ~KN_DETACHED; /* knlist_add usually sets it */ + kn->kn_status &= ~KN_DETACHED; /* knlist_add clears it */ + kn->kn_ptr.p_nexttime = malloc(sizeof(sbintime_t), M_KQUEUE, M_WAITOK); calloutp = malloc(sizeof(*calloutp), M_KQUEUE, M_WAITOK); - callout_init(calloutp, CALLOUT_MPSAFE); + callout_init(calloutp, 1); kn->kn_hook = calloutp; - callout_reset_curcpu(calloutp, timertoticks(kn->kn_sdata), - filt_timerexpire, kn); + *kn->kn_ptr.p_nexttime = to + sbinuptime(); + callout_reset_sbt_on(calloutp, *kn->kn_ptr.p_nexttime, 0, + filt_timerexpire, kn, PCPU_GET(cpuid), C_ABSOLUTE); return (0); } @@ -623,12 +738,15 @@ static void filt_timerdetach(struct knote *kn) { struct callout *calloutp; + unsigned int old; calloutp = (struct callout *)kn->kn_hook; callout_drain(calloutp); free(calloutp, M_KQUEUE); - atomic_add_int(&kq_ncallouts, -1); - kn->kn_status |= KN_DETACHED; /* knlist_remove usually clears it */ + free(kn->kn_ptr.p_nexttime, M_KQUEUE); + old = atomic_fetch_sub_explicit(&kq_ncallouts, 1, memory_order_relaxed); + KASSERT(old > 0, ("Number of callouts cannot become negative")); + kn->kn_status |= KN_DETACHED; /* knlist_remove sets it */ } static int @@ -727,34 +845,60 @@ filt_usertouch(struct knote *kn, struct kevent *kev, u_long type) } #ifdef __rtems__ +static int +kern_kqueue(struct thread *td, int flags, struct filecaps *fcaps); + static #endif /* __rtems__ */ int sys_kqueue(struct thread *td, struct kqueue_args *uap) { + + return (kern_kqueue(td, 0, NULL)); +} + +static void +kqueue_init(struct kqueue *kq) +{ + + mtx_init(&kq->kq_lock, "kqueue", NULL, MTX_DEF | MTX_DUPOK); + TAILQ_INIT(&kq->kq_head); + knlist_init_mtx(&kq->kq_sel.si_note, &kq->kq_lock); + TASK_INIT(&kq->kq_task, 0, kqueue_task, kq); +} + +int +kern_kqueue(struct thread *td, int flags, struct filecaps *fcaps) +{ struct filedesc *fdp; struct kqueue *kq; struct file *fp; + struct ucred *cred; int fd, error; #ifndef __rtems__ fdp = td->td_proc->p_fd; + cred = td->td_ucred; + if (!chgkqcnt(cred->cr_ruidinfo, 1, lim_cur(td, RLIMIT_KQUEUES))) + return (ENOMEM); #else /* __rtems__ */ - (void) fdp; + (void)fdp; + (void)cred; #endif /* __rtems__ */ - error = falloc(td, &fp, &fd, 0); - if (error) - goto done2; - /* An extra reference on `nfp' has been held for us by falloc(). */ + error = falloc_caps(td, &fp, &fd, flags, fcaps); + if (error != 0) { + chgkqcnt(cred->cr_ruidinfo, -1, 0); + return (error); + } + + /* An extra reference on `fp' has been held for us by falloc(). */ kq = malloc(sizeof *kq, M_KQUEUE, M_WAITOK | M_ZERO); - mtx_init(&kq->kq_lock, "kqueue", NULL, MTX_DEF|MTX_DUPOK); - TAILQ_INIT(&kq->kq_head); + kqueue_init(kq); #ifndef __rtems__ kq->kq_fdp = fdp; + kq->kq_cred = crhold(cred); #endif /* __rtems__ */ - knlist_init_mtx(&kq->kq_sel.si_note, &kq->kq_lock); - TASK_INIT(&kq->kq_task, 0, kqueue_task, kq); #ifndef __rtems__ FILEDESC_XLOCK(fdp); @@ -770,8 +914,7 @@ sys_kqueue(struct thread *td, struct kqueue_args *uap) fdrop(fp, td); td->td_retval[0] = fd; -done2: - return (error); + return (0); } #ifdef __rtems__ int @@ -806,9 +949,11 @@ struct kevent_args { }; #endif #ifdef __rtems__ -static int -kern_kevent(struct thread *td, int fd, int nchanges, int nevents, struct - kevent_copyops *k_ops, const struct timespec *timeout); +static int kern_kevent(struct thread *td, int fd, int nchanges, int nevents, + struct kevent_copyops *k_ops, const struct timespec *timeout); + +static int kern_kevent_fp(struct thread *td, struct file *fp, int nchanges, + int nevents, struct kevent_copyops *k_ops, const struct timespec *timeout); static #endif /* __rtems__ */ @@ -936,24 +1081,39 @@ int kern_kevent(struct thread *td, int fd, int nchanges, int nevents, struct kevent_copyops *k_ops, const struct timespec *timeout) { - struct kevent keva[KQ_NEVENTS]; - struct kevent *kevp, *changes; - struct kqueue *kq; + cap_rights_t rights; struct file *fp; - int i, n, nerrors, error; + int error; - if ((error = fget(td, fd, CAP_POST_EVENT, &fp)) != 0) + cap_rights_init(&rights); + if (nchanges > 0) + cap_rights_set(&rights, CAP_KQUEUE_CHANGE); + if (nevents > 0) + cap_rights_set(&rights, CAP_KQUEUE_EVENT); + error = fget(td, fd, &rights, &fp); + if (error != 0) return (error); - if ((error = kqueue_acquire(fp, &kq)) != 0) - goto done_norel; - nerrors = 0; + error = kern_kevent_fp(td, fp, nchanges, nevents, k_ops, timeout); + fdrop(fp, td); + return (error); +} + +static int +kqueue_kevent(struct kqueue *kq, struct thread *td, int nchanges, int nevents, + struct kevent_copyops *k_ops, const struct timespec *timeout) +{ + struct kevent keva[KQ_NEVENTS]; + struct kevent *kevp, *changes; + int i, n, nerrors, error; + + nerrors = 0; while (nchanges > 0) { n = nchanges > KQ_NEVENTS ? KQ_NEVENTS : nchanges; error = k_ops->k_copyin(k_ops->arg, keva, n); if (error) - goto done; + return (error); changes = keva; for (i = 0; i < n; i++) { kevp = &changes[i]; @@ -962,31 +1122,56 @@ kern_kevent(struct thread *td, int fd, int nchanges, int nevents, kevp->flags &= ~EV_SYSFLAGS; error = kqueue_register(kq, kevp, td, 1); if (error || (kevp->flags & EV_RECEIPT)) { - if (nevents != 0) { - kevp->flags = EV_ERROR; - kevp->data = error; - (void) k_ops->k_copyout(k_ops->arg, - kevp, 1); - nevents--; - nerrors++; - } else { - goto done; - } + if (nevents == 0) + return (error); + kevp->flags = EV_ERROR; + kevp->data = error; + (void)k_ops->k_copyout(k_ops->arg, kevp, 1); + nevents--; + nerrors++; } } nchanges -= n; } if (nerrors) { td->td_retval[0] = nerrors; - error = 0; - goto done; + return (0); } - error = kqueue_scan(kq, nevents, k_ops, timeout, keva, td); -done: + return (kqueue_scan(kq, nevents, k_ops, timeout, keva, td)); +} + +int +kern_kevent_fp(struct thread *td, struct file *fp, int nchanges, int nevents, + struct kevent_copyops *k_ops, const struct timespec *timeout) +{ + struct kqueue *kq; + int error; + + error = kqueue_acquire(fp, &kq); + if (error != 0) + return (error); + error = kqueue_kevent(kq, td, nchanges, nevents, k_ops, timeout); kqueue_release(kq, 0); -done_norel: - fdrop(fp, td); + return (error); +} + +/* + * Performs a kevent() call on a temporarily created kqueue. This can be + * used to perform one-shot polling, similar to poll() and select(). + */ +int +kern_kevent_anonymous(struct thread *td, int nevents, + struct kevent_copyops *k_ops) +{ + struct kqueue kq = {}; + int error; + + kqueue_init(&kq); + kq.kq_refcnt = 1; + error = kqueue_kevent(&kq, td, nevents, nevents, k_ops, NULL); + kqueue_drain(&kq, td); + kqueue_destroy(&kq); return (error); } @@ -1046,6 +1231,9 @@ kqueue_fo_find(int filt) if (filt > 0 || filt + EVFILT_SYSCOUNT < 0) return NULL; + if (sysfilt_ops[~filt].for_nolock) + return sysfilt_ops[~filt].for_fop; + mtx_lock(&filterops_lock); sysfilt_ops[~filt].for_refcnt++; if (sysfilt_ops[~filt].for_fop == NULL) @@ -1062,6 +1250,9 @@ kqueue_fo_release(int filt) if (filt > 0 || filt + EVFILT_SYSCOUNT < 0) return; + if (sysfilt_ops[~filt].for_nolock) + return; + mtx_lock(&filterops_lock); KASSERT(sysfilt_ops[~filt].for_refcnt > 0, ("filter object refcount not valid on release")); @@ -1080,11 +1271,17 @@ kqueue_register(struct kqueue *kq, struct kevent *kev, struct thread *td, int wa struct filterops *fops; struct file *fp; struct knote *kn, *tkn; + struct knlist *knl; + cap_rights_t rights; int error, filt, event; int haskqglobal, filedesc_unlock; + if ((kev->flags & (EV_ENABLE | EV_DISABLE)) == (EV_ENABLE | EV_DISABLE)) + return (EINVAL); + fp = NULL; kn = NULL; + knl = NULL; error = 0; haskqglobal = 0; filedesc_unlock = 0; @@ -1094,12 +1291,25 @@ kqueue_register(struct kqueue *kq, struct kevent *kev, struct thread *td, int wa if (fops == NULL) return EINVAL; - tkn = knote_alloc(waitok); /* prevent waiting with locks */ + if (kev->flags & EV_ADD) { + /* + * Prevent waiting with locks. Non-sleepable + * allocation failures are handled in the loop, only + * if the spare knote appears to be actually required. + */ + tkn = knote_alloc(waitok); + } else { + tkn = NULL; + } findkn: if (fops->f_isfd) { KASSERT(td != NULL, ("td is NULL")); - error = fget(td, kev->ident, CAP_POLL_EVENT, &fp); + if (kev->ident > INT_MAX) + error = EBADF; + else + error = fget(td, kev->ident, + cap_rights_init(&rights, CAP_EVENT), &fp); if (error) goto done; @@ -1120,7 +1330,7 @@ findkn: if (fp->f_io.pathinfo.handlers == &kqueueops) { #endif /* __rtems__ */ /* - * if we add some inteligence about what we are doing, + * If we add some intelligence about what we are doing, * we should be able to support events on ourselves. * We need to know when we are doing this to prevent * getting both the knlist lock and the kq lock since @@ -1152,7 +1362,18 @@ findkn: kqueue_expand(kq, fops, kev->ident, waitok); KQ_LOCK(kq); - if (kq->kq_knhashmask != 0) { + + /* + * If possible, find an existing knote to use for this kevent. + */ + if (kev->filter == EVFILT_PROC && + (kev->flags & (EV_FLAG1 | EV_FLAG2)) != 0) { + /* This is an internal creation of a process tracking + * note. Don't attempt to coalesce this with an + * existing note. + */ + ; + } else if (kq->kq_knhashmask != 0) { struct klist *list; list = &kq->kq_knhash[ @@ -1164,7 +1385,7 @@ findkn: } } - /* knote is in the process of changing, wait for it to stablize. */ + /* knote is in the process of changing, wait for it to stabilize. */ if (kn != NULL && (kn->kn_status & KN_INFLUX) == KN_INFLUX) { KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal); if (filedesc_unlock) { @@ -1208,7 +1429,7 @@ findkn: kev->data = 0; kn->kn_kevent = *kev; kn->kn_kevent.flags &= ~(EV_ADD | EV_DELETE | - EV_ENABLE | EV_DISABLE); + EV_ENABLE | EV_DISABLE | EV_FORCEONESHOT); kn->kn_status = KN_INFLUX|KN_DETACHED; error = knote_attach(kn, kq); @@ -1222,7 +1443,7 @@ findkn: knote_drop(kn, td); goto done; } - KN_LIST_LOCK(kn); + knl = kn_list_lock(kn); goto done_ev_add; } else { /* No matching knote and the EV_ADD flag is not set. */ @@ -1241,6 +1462,11 @@ findkn: goto done; } + if (kev->flags & EV_FORCEONESHOT) { + kn->kn_flags |= EV_ONESHOT; + KNOTE_ACTIVATE(kn, 1); + } + /* * The user may change some filter values after the initial EV_ADD, * but doing so will not reset any filter which has already been @@ -1248,7 +1474,7 @@ findkn: */ kn->kn_status |= KN_INFLUX | KN_SCAN; KQ_UNLOCK(kq); - KN_LIST_LOCK(kn); + knl = kn_list_lock(kn); kn->kn_kevent.udata = kev->udata; if (!fops->f_isfd && fops->f_touch != NULL) { fops->f_touch(kn, kev, EVENT_REGISTER); @@ -1265,24 +1491,24 @@ findkn: * kn_knlist. */ done_ev_add: - event = kn->kn_fop->f_event(kn, 0); + if ((kev->flags & EV_ENABLE) != 0) + kn->kn_status &= ~KN_DISABLED; + else if ((kev->flags & EV_DISABLE) != 0) + kn->kn_status |= KN_DISABLED; + + if ((kn->kn_status & KN_DISABLED) == 0) + event = kn->kn_fop->f_event(kn, 0); + else + event = 0; + KQ_LOCK(kq); if (event) - KNOTE_ACTIVATE(kn, 1); + kn->kn_status |= KN_ACTIVE; + if ((kn->kn_status & (KN_ACTIVE | KN_DISABLED | KN_QUEUED)) == + KN_ACTIVE) + knote_enqueue(kn); kn->kn_status &= ~(KN_INFLUX | KN_SCAN); - KN_LIST_UNLOCK(kn); - - if ((kev->flags & EV_DISABLE) && - ((kn->kn_status & KN_DISABLED) == 0)) { - kn->kn_status |= KN_DISABLED; - } - - if ((kev->flags & EV_ENABLE) && (kn->kn_status & KN_DISABLED)) { - kn->kn_status &= ~KN_DISABLED; - if ((kn->kn_status & KN_ACTIVE) && - ((kn->kn_status & KN_QUEUED) == 0)) - knote_enqueue(kn); - } + kn_list_unlock(knl); KQ_UNLOCK_FLUX(kq); done: @@ -1291,8 +1517,7 @@ done: FILEDESC_XUNLOCK(td->td_proc->p_fd); if (fp != NULL) fdrop(fp, td); - if (tkn != NULL) - knote_free(tkn); + knote_free(tkn); if (fops != NULL) kqueue_fo_release(filt); return (error); @@ -1348,7 +1573,7 @@ kqueue_schedtask(struct kqueue *kq) ("scheduling kqueue task while draining")); if ((kq->kq_state & KQ_TASKSCHED) != KQ_TASKSCHED) { - taskqueue_enqueue(taskqueue_kqueue, &kq->kq_task); + taskqueue_enqueue(taskqueue_kqueue_ctx, &kq->kq_task); kq->kq_state |= KQ_TASKSCHED; } } @@ -1456,10 +1681,10 @@ kqueue_scan(struct kqueue *kq, int maxevents, struct kevent_copyops *k_ops, const struct timespec *tsp, struct kevent *keva, struct thread *td) { struct kevent *kevp; - struct timeval atv, rtv, ttv; struct knote *kn, *marker; - int count, timeout, nkev, error, influx; - int haskqglobal, touch; + struct knlist *knl; + sbintime_t asbt, rsbt; + int count, error, haskqglobal, influx, nkev, touch; count = maxevents; nkev = 0; @@ -1469,53 +1694,42 @@ kqueue_scan(struct kqueue *kq, int maxevents, struct kevent_copyops *k_ops, if (maxevents == 0) goto done_nl; + rsbt = 0; if (tsp != NULL) { - TIMESPEC_TO_TIMEVAL(&atv, tsp); - if (itimerfix(&atv)) { + if (tsp->tv_sec < 0 || tsp->tv_nsec < 0 || + tsp->tv_nsec >= 1000000000) { error = EINVAL; goto done_nl; } - if (tsp->tv_sec == 0 && tsp->tv_nsec == 0) - timeout = -1; - else - timeout = atv.tv_sec > 24 * 60 * 60 ? - 24 * 60 * 60 * hz : tvtohz(&atv); - getmicrouptime(&rtv); - timevaladd(&atv, &rtv); - } else { - atv.tv_sec = 0; - atv.tv_usec = 0; - timeout = 0; - } + if (timespecisset(tsp)) { + if (tsp->tv_sec <= INT32_MAX) { + rsbt = tstosbt(*tsp); + if (TIMESEL(&asbt, rsbt)) + asbt += tc_tick_sbt; + if (asbt <= SBT_MAX - rsbt) + asbt += rsbt; + else + asbt = 0; + rsbt >>= tc_precexp; + } else + asbt = 0; + } else + asbt = -1; + } else + asbt = 0; marker = knote_alloc(1); - if (marker == NULL) { - error = ENOMEM; - goto done_nl; - } marker->kn_status = KN_MARKER; KQ_LOCK(kq); - goto start; retry: - if (atv.tv_sec || atv.tv_usec) { - getmicrouptime(&rtv); - if (timevalcmp(&rtv, &atv, >=)) - goto done; - ttv = atv; - timevalsub(&ttv, &rtv); - timeout = ttv.tv_sec > 24 * 60 * 60 ? - 24 * 60 * 60 * hz : tvtohz(&ttv); - } - -start: kevp = keva; if (kq->kq_count == 0) { - if (timeout < 0) { + if (asbt == -1) { error = EWOULDBLOCK; } else { kq->kq_state |= KQ_SLEEP; - error = msleep(kq, &kq->kq_lock, PSOCK | PCATCH, - "kqread", timeout); + error = msleep_sbt(kq, &kq->kq_lock, PSOCK | PCATCH, + "kqread", asbt, rsbt, C_ABSOLUTE); } if (error == 0) goto retry; @@ -1594,7 +1808,7 @@ start: KQ_UNLOCK(kq); if ((kn->kn_status & KN_KQUEUE) == KN_KQUEUE) KQ_GLOBAL_LOCK(&kq_global, haskqglobal); - KN_LIST_LOCK(kn); + knl = kn_list_lock(kn); if (kn->kn_fop->f_event(kn, 0) == 0) { KQ_LOCK(kq); KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal); @@ -1602,7 +1816,7 @@ start: ~(KN_QUEUED | KN_ACTIVE | KN_INFLUX | KN_SCAN); kq->kq_count--; - KN_LIST_UNLOCK(kn); + kn_list_unlock(knl); influx = 1; continue; } @@ -1614,7 +1828,7 @@ start: *kevp = kn->kn_kevent; KQ_LOCK(kq); KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal); - if (kn->kn_flags & (EV_CLEAR | EV_DISPATCH)) { + if (kn->kn_flags & (EV_CLEAR | EV_DISPATCH)) { /* * Manually clear knotes who weren't * 'touch'ed. @@ -1631,7 +1845,7 @@ start: TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); kn->kn_status &= ~(KN_INFLUX | KN_SCAN); - KN_LIST_UNLOCK(kn); + kn_list_unlock(knl); influx = 1; } @@ -1665,35 +1879,6 @@ done_nl: } #ifndef __rtems__ -/* - * XXX - * This could be expanded to call kqueue_scan, if desired. - */ -/*ARGSUSED*/ -static int -kqueue_read(struct file *fp, struct uio *uio, struct ucred *active_cred, - int flags, struct thread *td) -{ - return (ENXIO); -} - -/*ARGSUSED*/ -static int -kqueue_write(struct file *fp, struct uio *uio, struct ucred *active_cred, - int flags, struct thread *td) -{ - return (ENXIO); -} - -/*ARGSUSED*/ -static int -kqueue_truncate(struct file *fp, off_t length, struct ucred *active_cred, - struct thread *td) -{ - - return (EINVAL); -} - /*ARGSUSED*/ static int kqueue_ioctl(struct file *fp, u_long cmd, void *data, @@ -1813,26 +1998,12 @@ rtems_bsd_kqueue_stat(const rtems_filesystem_location_info_t *loc, return (0); } -/*ARGSUSED*/ -static int -kqueue_close(struct file *fp, struct thread *td) +static void +kqueue_drain(struct kqueue *kq, struct thread *td) { - struct kqueue *kq = fp->f_data; - struct filedesc *fdp; struct knote *kn; int i; - int error; - int filedesc_unlock; - -#ifdef __rtems__ - /* FIXME: Move this to the RTEMS close() function */ - knote_fdclose(td, rtems_bsd_fp_to_fd(fp)); -#endif /* __rtems__ */ - if ((error = kqueue_acquire(fp, &kq))) - return error; - - filedesc_unlock = 0; KQ_LOCK(kq); KASSERT((kq->kq_state & KQ_CLOSING) != KQ_CLOSING, @@ -1842,11 +2013,6 @@ kqueue_close(struct file *fp, struct thread *td) msleep(&kq->kq_refcnt, &kq->kq_lock, PSOCK, "kqclose", 0); KASSERT(kq->kq_refcnt == 1, ("other refs are out there!")); -#ifndef __rtems__ - fdp = kq->kq_fdp; -#else /* __rtems__ */ - (void) fdp; -#endif /* __rtems__ */ KASSERT(knlist_empty(&kq->kq_sel.si_note), ("kqueue's knlist not empty")); @@ -1897,6 +2063,38 @@ kqueue_close(struct file *fp, struct thread *td) } KQ_UNLOCK(kq); +} + +static void +kqueue_destroy(struct kqueue *kq) +{ + + KASSERT(kq->kq_fdp == NULL, + ("kqueue still attached to a file descriptor")); + seldrain(&kq->kq_sel); + knlist_destroy(&kq->kq_sel.si_note); + mtx_destroy(&kq->kq_lock); + + if (kq->kq_knhash != NULL) + free(kq->kq_knhash, M_KQUEUE); + if (kq->kq_knlist != NULL) + free(kq->kq_knlist, M_KQUEUE); + + funsetown(&kq->kq_sigio); +} + +/*ARGSUSED*/ +static int +kqueue_close(struct file *fp, struct thread *td) +{ + struct kqueue *kq = fp->f_data; + struct filedesc *fdp; + int error; + int filedesc_unlock; + + if ((error = kqueue_acquire(fp, &kq))) + return error; + kqueue_drain(kq, td); #ifndef __rtems__ /* @@ -1905,6 +2103,8 @@ kqueue_close(struct file *fp, struct thread *td) * lock is owned, and filedesc sx is locked before, to not * take the sleepable lock after non-sleepable. */ + fdp = kq->kq_fdp; + kq->kq_fdp = NULL; if (!sx_xlocked(FILEDESC_LOCK(fdp))) { FILEDESC_XLOCK(fdp); filedesc_unlock = 1; @@ -1920,19 +2120,9 @@ kqueue_close(struct file *fp, struct thread *td) rtems_libio_unlock(); #endif /* __rtems__ */ - seldrain(&kq->kq_sel); - knlist_destroy(&kq->kq_sel.si_note); - mtx_destroy(&kq->kq_lock); -#ifndef __rtems__ - kq->kq_fdp = NULL; -#endif /* __rtems__ */ - - if (kq->kq_knhash != NULL) - free(kq->kq_knhash, M_KQUEUE); - if (kq->kq_knlist != NULL) - free(kq->kq_knlist, M_KQUEUE); - - funsetown(&kq->kq_sigio); + kqueue_destroy(kq); + chgkqcnt(kq->kq_cred->cr_ruidinfo, -1, 0); + crfree(kq->kq_cred); free(kq, M_KQUEUE); fp->f_data = NULL; @@ -1956,6 +2146,16 @@ rtems_bsd_kqueue_close(rtems_libio_t *iop) } #endif /* __rtems__ */ +#ifndef __rtems__ +static int +kqueue_fill_kinfo(struct file *fp, struct kinfo_file *kif, struct filedesc *fdp) +{ + + kif->kf_type = KF_TYPE_KQUEUE; + return (0); +} +#endif /* __rtems__ */ + static void kqueue_wakeup(struct kqueue *kq) { @@ -1993,7 +2193,7 @@ void knote(struct knlist *list, long hint, int lockflags) { struct kqueue *kq; - struct knote *kn; + struct knote *kn, *tkn; int error; if (list == NULL) @@ -2005,14 +2205,13 @@ knote(struct knlist *list, long hint, int lockflags) list->kl_lock(list->kl_lockarg); /* - * If we unlock the list lock (and set KN_INFLUX), we can eliminate - * the kqueue scheduling, but this will introduce four - * lock/unlock's for each knote to test. If we do, continue to use - * SLIST_FOREACH, SLIST_FOREACH_SAFE is not safe in our case, it is - * only safe if you want to remove the current item, which we are - * not doing. + * If we unlock the list lock (and set KN_INFLUX), we can + * eliminate the kqueue scheduling, but this will introduce + * four lock/unlock's for each knote to test. Also, marker + * would be needed to keep iteration position, since filters + * or other threads could remove events. */ - SLIST_FOREACH(kn, &list->kl_list, kn_selnext) { + SLIST_FOREACH_SAFE(kn, &list->kl_list, kn_selnext, tkn) { kq = kn->kn_kq; KQ_LOCK(kq); if ((kn->kn_status & (KN_INFLUX | KN_SCAN)) == KN_INFLUX) { @@ -2068,7 +2267,8 @@ knlist_add(struct knlist *knl, struct knote *kn, int islocked) } static void -knlist_remove_kq(struct knlist *knl, struct knote *kn, int knlislocked, int kqislocked) +knlist_remove_kq(struct knlist *knl, struct knote *kn, int knlislocked, + int kqislocked) { KASSERT(!(!!kqislocked && !knlislocked), ("kq locked w/o knl locked")); KNL_ASSERT_LOCK(knl, knlislocked); @@ -2081,7 +2281,7 @@ knlist_remove_kq(struct knlist *knl, struct knote *kn, int knlislocked, int kqis SLIST_REMOVE(&knl->kl_list, kn, knote, kn_selnext); kn->kn_knlist = NULL; if (!knlislocked) - knl->kl_unlock(knl->kl_lockarg); + kn_list_unlock(knl); if (!kqislocked) KQ_LOCK(kn->kn_kq); kn->kn_status |= KN_DETACHED; @@ -2090,7 +2290,7 @@ knlist_remove_kq(struct knlist *knl, struct knote *kn, int knlislocked, int kqis } /* - * remove all knotes from a specified klist + * remove knote from the specified knlist */ void knlist_remove(struct knlist *knl, struct knote *kn, int islocked) @@ -2099,20 +2299,10 @@ knlist_remove(struct knlist *knl, struct knote *kn, int islocked) knlist_remove_kq(knl, kn, islocked, 0); } -/* - * remove knote from a specified klist while in f_event handler. - */ -void -knlist_remove_inevent(struct knlist *knl, struct knote *kn) -{ - - knlist_remove_kq(knl, kn, 1, - (kn->kn_status & KN_HASKQLOCK) == KN_HASKQLOCK); -} - int knlist_empty(struct knlist *knl) { + KNL_ASSERT_LOCKED(knl); return SLIST_EMPTY(&knl->kl_list); } @@ -2126,27 +2316,61 @@ static void knlist_mtx_unlock(void *arg); static void knlist_mtx_lock(void *arg) { + mtx_lock((struct mtx *)arg); } static void knlist_mtx_unlock(void *arg) { + mtx_unlock((struct mtx *)arg); } static void knlist_mtx_assert_locked(void *arg) { + mtx_assert((struct mtx *)arg, MA_OWNED); } static void knlist_mtx_assert_unlocked(void *arg) { + mtx_assert((struct mtx *)arg, MA_NOTOWNED); } +#ifndef __rtems__ +static void +knlist_rw_rlock(void *arg) +{ + + rw_rlock((struct rwlock *)arg); +} + +static void +knlist_rw_runlock(void *arg) +{ + + rw_runlock((struct rwlock *)arg); +} + +static void +knlist_rw_assert_locked(void *arg) +{ + + rw_assert((struct rwlock *)arg, RA_LOCKED); +} + +static void +knlist_rw_assert_unlocked(void *arg) +{ + + rw_assert((struct rwlock *)arg, RA_UNLOCKED); +} +#endif /* __rtems__ */ + void knlist_init(struct knlist *knl, void *lock, void (*kl_lock)(void *), void (*kl_unlock)(void *), @@ -2175,6 +2399,7 @@ knlist_init(struct knlist *knl, void *lock, void (*kl_lock)(void *), else knl->kl_assert_unlocked = kl_assert_unlocked; + knl->kl_autodestroy = 0; SLIST_INIT(&knl->kl_list); } @@ -2185,6 +2410,26 @@ knlist_init_mtx(struct knlist *knl, struct mtx *lock) knlist_init(knl, lock, NULL, NULL, NULL, NULL); } +struct knlist * +knlist_alloc(struct mtx *lock) +{ + struct knlist *knl; + + knl = malloc(sizeof(struct knlist), M_KQUEUE, M_WAITOK); + knlist_init_mtx(knl, lock); + return (knl); +} + +#ifndef __rtems__ +void +knlist_init_rw_reader(struct knlist *knl, struct rwlock *lock) +{ + + knlist_init(knl, lock, knlist_rw_rlock, knlist_rw_runlock, + knlist_rw_assert_locked, knlist_rw_assert_unlocked); +} +#endif /* __rtems__ */ + void knlist_destroy(struct knlist *knl) { @@ -2192,7 +2437,7 @@ knlist_destroy(struct knlist *knl) #ifdef INVARIANTS /* * if we run across this error, we need to find the offending - * driver and have it call knlist_clear. + * driver and have it call knlist_clear or knlist_delete. */ if (!SLIST_EMPTY(&knl->kl_list)) printf("WARNING: destroying knlist w/ knotes on it!\n"); @@ -2202,6 +2447,18 @@ knlist_destroy(struct knlist *knl) SLIST_INIT(&knl->kl_list); } +void +knlist_detach(struct knlist *knl) +{ + + KNL_ASSERT_LOCKED(knl); + knl->kl_autodestroy = 1; + if (knlist_empty(knl)) { + knlist_destroy(knl); + free(knl, M_KQUEUE); + } +} + /* * Even if we are locked, we may need to drop the lock to allow any influx * knotes time to "settle". @@ -2212,6 +2469,7 @@ knlist_cleardel(struct knlist *knl, struct thread *td, int islocked, int killkn) struct knote *kn, *kn2; struct kqueue *kq; + KASSERT(!knl->kl_autodestroy, ("cleardel for autodestroy %p", knl)); if (islocked) KNL_ASSERT_LOCKED(knl); else { @@ -2421,15 +2679,16 @@ SYSINIT(knote, SI_SUB_PSEUDO, SI_ORDER_ANY, knote_init, NULL); static struct knote * knote_alloc(int waitok) { - return ((struct knote *)uma_zalloc(knote_zone, - (waitok ? M_WAITOK : M_NOWAIT)|M_ZERO)); + + return (uma_zalloc(knote_zone, (waitok ? M_WAITOK : M_NOWAIT) | + M_ZERO)); } static void knote_free(struct knote *kn) { - if (kn != NULL) - uma_zfree(knote_zone, kn); + + uma_zfree(knote_zone, kn); } /* @@ -2440,9 +2699,11 @@ kqfd_register(int fd, struct kevent *kev, struct thread *td, int waitok) { struct kqueue *kq; struct file *fp; + cap_rights_t rights; int error; - if ((error = fget(td, fd, CAP_POST_EVENT, &fp)) != 0) + error = fget(td, fd, cap_rights_init(&rights, CAP_KQUEUE_CHANGE), &fp); + if (error != 0) return (error); if ((error = kqueue_acquire(fp, &kq)) != 0) goto noacquire; |