diff options
author | Sebastian Huber <sebastian.huber@embedded-brains.de> | 2017-04-04 09:36:57 +0200 |
---|---|---|
committer | Sebastian Huber <sebastian.huber@embedded-brains.de> | 2017-04-04 14:46:23 +0200 |
commit | de8a76da2f374792594ce03a203b3f30e4889f6f (patch) | |
tree | 12b5e1e59358005c3c522955c08aee4795e4829c /freebsd/sys/kern | |
parent | Enable bridging by default (diff) | |
download | rtems-libbsd-de8a76da2f374792594ce03a203b3f30e4889f6f.tar.bz2 |
Update to FreeBSD head 2017-04-04
Git mirror commit 642b174daddbd0efd9bb5f242c43f4ab4db6869f.
Diffstat (limited to 'freebsd/sys/kern')
-rw-r--r-- | freebsd/sys/kern/init_main.c | 2 | ||||
-rw-r--r-- | freebsd/sys/kern/kern_condvar.c | 10 | ||||
-rw-r--r-- | freebsd/sys/kern/kern_event.c | 29 | ||||
-rw-r--r-- | freebsd/sys/kern/kern_linker.c | 3 | ||||
-rw-r--r-- | freebsd/sys/kern/kern_mib.c | 5 | ||||
-rw-r--r-- | freebsd/sys/kern/kern_synch.c | 39 | ||||
-rw-r--r-- | freebsd/sys/kern/kern_sysctl.c | 3 | ||||
-rw-r--r-- | freebsd/sys/kern/kern_time.c | 145 | ||||
-rw-r--r-- | freebsd/sys/kern/kern_timeout.c | 8 | ||||
-rw-r--r-- | freebsd/sys/kern/subr_bus.c | 6 | ||||
-rw-r--r-- | freebsd/sys/kern/subr_lock.c | 64 | ||||
-rw-r--r-- | freebsd/sys/kern/subr_prf.c | 54 | ||||
-rw-r--r-- | freebsd/sys/kern/subr_sleepqueue.c | 199 | ||||
-rw-r--r-- | freebsd/sys/kern/subr_taskqueue.c | 17 | ||||
-rw-r--r-- | freebsd/sys/kern/subr_uio.c | 9 | ||||
-rw-r--r-- | freebsd/sys/kern/subr_unit.c | 17 | ||||
-rw-r--r-- | freebsd/sys/kern/sys_generic.c | 73 | ||||
-rw-r--r-- | freebsd/sys/kern/uipc_mbuf.c | 4 | ||||
-rw-r--r-- | freebsd/sys/kern/uipc_mbuf2.c | 2 | ||||
-rw-r--r-- | freebsd/sys/kern/uipc_sockbuf.c | 5 | ||||
-rw-r--r-- | freebsd/sys/kern/uipc_socket.c | 77 | ||||
-rw-r--r-- | freebsd/sys/kern/uipc_syscalls.c | 46 | ||||
-rw-r--r-- | freebsd/sys/kern/uipc_usrreq.c | 28 |
23 files changed, 591 insertions, 254 deletions
diff --git a/freebsd/sys/kern/init_main.c b/freebsd/sys/kern/init_main.c index 72bab872..910c1820 100644 --- a/freebsd/sys/kern/init_main.c +++ b/freebsd/sys/kern/init_main.c @@ -102,7 +102,7 @@ void mi_startup(void); /* Should be elsewhere */ static struct session session0; static struct pgrp pgrp0; struct proc proc0; -struct thread0_storage thread0_st __aligned(16); +struct thread0_storage thread0_st __aligned(32); struct vmspace vmspace0; struct proc *initproc; diff --git a/freebsd/sys/kern/kern_condvar.c b/freebsd/sys/kern/kern_condvar.c index 6358c376..2843e273 100644 --- a/freebsd/sys/kern/kern_condvar.c +++ b/freebsd/sys/kern/kern_condvar.c @@ -124,7 +124,7 @@ _cv_wait(struct cv *cvp, struct lock_object *lock) "Waiting on \"%s\"", cvp->cv_description); class = LOCK_CLASS(lock); - if (SCHEDULER_STOPPED()) + if (SCHEDULER_STOPPED_TD(td)) return; sleepq_lock(cvp); @@ -178,7 +178,7 @@ _cv_wait_unlock(struct cv *cvp, struct lock_object *lock) ("cv_wait_unlock cannot be used with Giant")); class = LOCK_CLASS(lock); - if (SCHEDULER_STOPPED()) { + if (SCHEDULER_STOPPED_TD(td)) { class->lc_unlock(lock); return; } @@ -230,7 +230,7 @@ _cv_wait_sig(struct cv *cvp, struct lock_object *lock) "Waiting on \"%s\"", cvp->cv_description); class = LOCK_CLASS(lock); - if (SCHEDULER_STOPPED()) + if (SCHEDULER_STOPPED_TD(td)) return (0); sleepq_lock(cvp); @@ -295,7 +295,7 @@ _cv_timedwait_sbt(struct cv *cvp, struct lock_object *lock, sbintime_t sbt, "Waiting on \"%s\"", cvp->cv_description); class = LOCK_CLASS(lock); - if (SCHEDULER_STOPPED()) + if (SCHEDULER_STOPPED_TD(td)) return (0); sleepq_lock(cvp); @@ -358,7 +358,7 @@ _cv_timedwait_sig_sbt(struct cv *cvp, struct lock_object *lock, "Waiting on \"%s\"", cvp->cv_description); class = LOCK_CLASS(lock); - if (SCHEDULER_STOPPED()) + if (SCHEDULER_STOPPED_TD(td)) return (0); sleepq_lock(cvp); diff --git a/freebsd/sys/kern/kern_event.c b/freebsd/sys/kern/kern_event.c index 473414ae..a01e9b4b 100644 --- a/freebsd/sys/kern/kern_event.c +++ b/freebsd/sys/kern/kern_event.c @@ -372,6 +372,7 @@ static struct { { &null_filtops }, /* EVFILT_LIO */ { &user_filtops, 1 }, /* EVFILT_USER */ { &null_filtops }, /* EVFILT_SENDFILE */ + { &file_filtops, 1 }, /* EVFILT_EMPTY */ }; /* @@ -965,6 +966,17 @@ kqueue(void) } #endif /* __rtems__ */ +#ifdef KTRACE +static size_t +kev_iovlen(int n, u_int kgio) +{ + + if (n < 0 || n >= kgio / sizeof(struct kevent)) + return (kgio); + return (n * sizeof(struct kevent)); +} +#endif + #ifndef _SYS_SYSPROTO_H_ struct kevent_args { int fd; @@ -988,15 +1000,18 @@ int sys_kevent(struct thread *td, struct kevent_args *uap) { struct timespec ts, *tsp; - struct kevent_copyops k_ops = { uap, - kevent_copyout, - kevent_copyin}; + struct kevent_copyops k_ops = { + .arg = uap, + .k_copyout = kevent_copyout, + .k_copyin = kevent_copyin, + }; int error; #ifdef KTRACE struct uio ktruio; struct iovec ktriov; struct uio *ktruioin = NULL; struct uio *ktruioout = NULL; + u_int kgio; #endif if (uap->timeout != NULL) { @@ -1009,13 +1024,15 @@ sys_kevent(struct thread *td, struct kevent_args *uap) #ifdef KTRACE if (KTRPOINT(td, KTR_GENIO)) { + kgio = ktr_geniosize; ktriov.iov_base = uap->changelist; - ktriov.iov_len = uap->nchanges * sizeof(struct kevent); + ktriov.iov_len = kev_iovlen(uap->nchanges, kgio); ktruio = (struct uio){ .uio_iov = &ktriov, .uio_iovcnt = 1, .uio_segflg = UIO_USERSPACE, .uio_rw = UIO_READ, .uio_td = td }; ktruioin = cloneuio(&ktruio); ktriov.iov_base = uap->eventlist; + ktriov.iov_len = kev_iovlen(uap->nevents, kgio); ktriov.iov_len = uap->nevents * sizeof(struct kevent); ktruioout = cloneuio(&ktruio); } @@ -1026,9 +1043,9 @@ sys_kevent(struct thread *td, struct kevent_args *uap) #ifdef KTRACE if (ktruioin != NULL) { - ktruioin->uio_resid = uap->nchanges * sizeof(struct kevent); + ktruioin->uio_resid = kev_iovlen(uap->nchanges, kgio); ktrgenio(uap->fd, UIO_WRITE, ktruioin, 0); - ktruioout->uio_resid = td->td_retval[0] * sizeof(struct kevent); + ktruioout->uio_resid = kev_iovlen(td->td_retval[0], kgio); ktrgenio(uap->fd, UIO_READ, ktruioout, error); } #endif diff --git a/freebsd/sys/kern/kern_linker.c b/freebsd/sys/kern/kern_linker.c index 82a33023..5a69490d 100644 --- a/freebsd/sys/kern/kern_linker.c +++ b/freebsd/sys/kern/kern_linker.c @@ -475,7 +475,8 @@ linker_load_file(const char *filename, linker_file_t *result) * printout a message before to fail. */ if (error == ENOSYS) - printf("linker_load_file: Unsupported file type\n"); + printf("%s: %s - unsupported file type\n", + __func__, filename); /* * Format not recognized or otherwise unloadable. diff --git a/freebsd/sys/kern/kern_mib.c b/freebsd/sys/kern/kern_mib.c index 63d8b44f..b3ab9e9f 100644 --- a/freebsd/sys/kern/kern_mib.c +++ b/freebsd/sys/kern/kern_mib.c @@ -97,6 +97,11 @@ SYSCTL_ROOT_NODE(OID_AUTO, regression, CTLFLAG_RW, 0, "Regression test MIB"); #endif +#ifdef EXT_RESOURCES +SYSCTL_ROOT_NODE(OID_AUTO, clock, CTLFLAG_RW, 0, + "Clocks"); +#endif + SYSCTL_STRING(_kern, OID_AUTO, ident, CTLFLAG_RD|CTLFLAG_MPSAFE, kern_ident, 0, "Kernel identifier"); diff --git a/freebsd/sys/kern/kern_synch.c b/freebsd/sys/kern/kern_synch.c index 7109e798..91b3c72b 100644 --- a/freebsd/sys/kern/kern_synch.c +++ b/freebsd/sys/kern/kern_synch.c @@ -68,13 +68,6 @@ __FBSDID("$FreeBSD$"); #include <machine/cpu.h> -#define KTDSTATE(td) \ - (((td)->td_inhibitors & TDI_SLEEPING) != 0 ? "sleep" : \ - ((td)->td_inhibitors & TDI_SUSPENDED) != 0 ? "suspended" : \ - ((td)->td_inhibitors & TDI_SWAPPED) != 0 ? "swapped" : \ - ((td)->td_inhibitors & TDI_LOCK) != 0 ? "blocked" : \ - ((td)->td_inhibitors & TDI_IWAIT) != 0 ? "iwait" : "yielding") - #ifndef __rtems__ static void synch_setup(void *dummy); SYSINIT(synch_setup, SI_SUB_KICK_SCHEDULER, SI_ORDER_FIRST, synch_setup, @@ -164,11 +157,9 @@ _sleep(void *ident, struct lock_object *lock, int priority, "Sleeping on \"%s\"", wmesg); KASSERT(sbt != 0 || mtx_owned(&Giant) || lock != NULL, ("sleeping without a lock")); + KASSERT(ident != NULL, ("_sleep: NULL ident")); #ifndef __rtems__ - KASSERT(p != NULL, ("msleep1")); -#endif /* __rtems__ */ - KASSERT(ident != NULL && TD_IS_RUNNING(td), ("msleep")); -#ifndef __rtems__ + KASSERT(TD_IS_RUNNING(td), ("_sleep: curthread not running")); if (priority & PDROP) KASSERT(lock != NULL && lock != &Giant.lock_object, ("PDROP requires a non-Giant lock")); @@ -179,7 +170,7 @@ _sleep(void *ident, struct lock_object *lock, int priority, class = NULL; #ifndef __rtems__ - if (SCHEDULER_STOPPED()) { + if (SCHEDULER_STOPPED_TD(td)) { if (lock != NULL && priority & PDROP) class->lc_unlock(lock); return (0); @@ -277,10 +268,10 @@ msleep_spin_sbt(void *ident, struct mtx *mtx, const char *wmesg, td = curthread; p = td->td_proc; KASSERT(mtx != NULL, ("sleeping without a mutex")); - KASSERT(p != NULL, ("msleep1")); - KASSERT(ident != NULL && TD_IS_RUNNING(td), ("msleep")); + KASSERT(ident != NULL, ("msleep_spin_sbt: NULL ident")); + KASSERT(TD_IS_RUNNING(td), ("msleep_spin_sbt: curthread not running")); - if (SCHEDULER_STOPPED()) + if (SCHEDULER_STOPPED_TD(td)) return (0); sleepq_lock(ident); @@ -447,7 +438,7 @@ mi_switch(int flags, struct thread *newtd) */ if (kdb_active) kdb_switch(); - if (SCHEDULER_STOPPED()) + if (SCHEDULER_STOPPED_TD(td)) return; if (flags & SW_VOL) { td->td_ru.ru_nvcsw++; @@ -473,20 +464,12 @@ mi_switch(int flags, struct thread *newtd) PCPU_SET(switchticks, ticks); CTR4(KTR_PROC, "mi_switch: old thread %ld (td_sched %p, pid %ld, %s)", td->td_tid, td_get_sched(td), td->td_proc->p_pid, td->td_name); -#if (KTR_COMPILE & KTR_SCHED) != 0 - if (TD_IS_IDLETHREAD(td)) - KTR_STATE1(KTR_SCHED, "thread", sched_tdname(td), "idle", - "prio:%d", td->td_priority); - else - KTR_STATE3(KTR_SCHED, "thread", sched_tdname(td), KTDSTATE(td), - "prio:%d", td->td_priority, "wmesg:\"%s\"", td->td_wmesg, - "lockname:\"%s\"", td->td_lockname); +#ifdef KDTRACE_HOOKS + if ((flags & SW_PREEMPT) != 0 || ((flags & SW_INVOL) != 0 && + (flags & SW_TYPE_MASK) == SWT_NEEDRESCHED)) + SDT_PROBE0(sched, , , preempt); #endif - SDT_PROBE0(sched, , , preempt); sched_switch(td, newtd, flags); - KTR_STATE1(KTR_SCHED, "thread", sched_tdname(td), "running", - "prio:%d", td->td_priority); - CTR4(KTR_PROC, "mi_switch: new thread %ld (td_sched %p, pid %ld, %s)", td->td_tid, td_get_sched(td), td->td_proc->p_pid, td->td_name); diff --git a/freebsd/sys/kern/kern_sysctl.c b/freebsd/sys/kern/kern_sysctl.c index 9553f4c7..0f003907 100644 --- a/freebsd/sys/kern/kern_sysctl.c +++ b/freebsd/sys/kern/kern_sysctl.c @@ -634,7 +634,8 @@ sysctl_remove_oid_locked(struct sysctl_oid *oidp, int del, int recurse) if (oidp == NULL) return(EINVAL); if ((oidp->oid_kind & CTLFLAG_DYN) == 0) { - printf("can't remove non-dynamic nodes!\n"); + printf("Warning: can't remove non-dynamic nodes (%s)!\n", + oidp->oid_name); return (EINVAL); } /* diff --git a/freebsd/sys/kern/kern_time.c b/freebsd/sys/kern/kern_time.c index 95111932..2fc7092f 100644 --- a/freebsd/sys/kern/kern_time.c +++ b/freebsd/sys/kern/kern_time.c @@ -90,6 +90,9 @@ static uma_zone_t itimer_zone = NULL; static int settime(struct thread *, struct timeval *); #endif /* __rtems__ */ static void timevalfix(struct timeval *); +static int user_clock_nanosleep(struct thread *td, clockid_t clock_id, + int flags, const struct timespec *ua_rqtp, + struct timespec *ua_rmtp); #ifndef __rtems__ static void itimer_start(void); @@ -397,6 +400,11 @@ sys_clock_settime(struct thread *td, struct clock_settime_args *uap) return (kern_clock_settime(td, uap->clock_id, &ats)); } +static int allow_insane_settime = 0; +SYSCTL_INT(_debug, OID_AUTO, allow_insane_settime, CTLFLAG_RWTUN, + &allow_insane_settime, 0, + "do not perform possibly restrictive checks on settime(2) args"); + int kern_clock_settime(struct thread *td, clockid_t clock_id, struct timespec *ats) { @@ -410,6 +418,8 @@ kern_clock_settime(struct thread *td, clockid_t clock_id, struct timespec *ats) if (ats->tv_nsec < 0 || ats->tv_nsec >= 1000000000 || ats->tv_sec < 0) return (EINVAL); + if (!allow_insane_settime && ats->tv_sec > 9999ULL * 366 * 24 * 60 * 60) + return (EINVAL); /* XXX Don't convert nsec->usec and back */ TIMESPEC_TO_TIMEVAL(&atv, ats); error = settime(td, &atv); @@ -487,47 +497,95 @@ kern_clock_getres(struct thread *td, clockid_t clock_id, struct timespec *ts) } #endif +int +kern_nanosleep(struct thread *td, struct timespec *rqt, struct timespec *rmt) +{ + + return (kern_clock_nanosleep(td, CLOCK_REALTIME, TIMER_RELTIME, rqt, + rmt)); +} + static uint8_t nanowait[MAXCPU]; int -kern_nanosleep(struct thread *td, struct timespec *rqt, struct timespec *rmt) +kern_clock_nanosleep(struct thread *td, clockid_t clock_id, int flags, + const struct timespec *rqt, struct timespec *rmt) { - struct timespec ts; + struct timespec ts, now; sbintime_t sbt, sbtt, prec, tmp; time_t over; int error; + bool is_abs_real; if (rqt->tv_nsec < 0 || rqt->tv_nsec >= 1000000000) return (EINVAL); - if (rqt->tv_sec < 0 || (rqt->tv_sec == 0 && rqt->tv_nsec == 0)) - return (0); - ts = *rqt; - if (ts.tv_sec > INT32_MAX / 2) { - over = ts.tv_sec - INT32_MAX / 2; - ts.tv_sec -= over; - } else - over = 0; - tmp = tstosbt(ts); - prec = tmp; - prec >>= tc_precexp; - if (TIMESEL(&sbt, tmp)) - sbt += tc_tick_sbt; - sbt += tmp; - error = tsleep_sbt(&nanowait[curcpu], PWAIT | PCATCH, "nanslp", - sbt, prec, C_ABSOLUTE); + if ((flags & ~TIMER_ABSTIME) != 0) + return (EINVAL); + switch (clock_id) { + case CLOCK_REALTIME: + case CLOCK_REALTIME_PRECISE: + case CLOCK_REALTIME_FAST: + case CLOCK_SECOND: + is_abs_real = (flags & TIMER_ABSTIME) != 0; + break; + case CLOCK_MONOTONIC: + case CLOCK_MONOTONIC_PRECISE: + case CLOCK_MONOTONIC_FAST: + case CLOCK_UPTIME: + case CLOCK_UPTIME_PRECISE: + case CLOCK_UPTIME_FAST: + is_abs_real = false; + break; + case CLOCK_VIRTUAL: + case CLOCK_PROF: + case CLOCK_PROCESS_CPUTIME_ID: + return (ENOTSUP); + case CLOCK_THREAD_CPUTIME_ID: + default: + return (EINVAL); + } + do { + ts = *rqt; + if ((flags & TIMER_ABSTIME) != 0) { + if (is_abs_real) + td->td_rtcgen = + atomic_load_acq_int(&rtc_generation); + error = kern_clock_gettime(td, clock_id, &now); + KASSERT(error == 0, ("kern_clock_gettime: %d", error)); + timespecsub(&ts, &now); + } + if (ts.tv_sec < 0 || (ts.tv_sec == 0 && ts.tv_nsec == 0)) { + error = EWOULDBLOCK; + break; + } + if (ts.tv_sec > INT32_MAX / 2) { + over = ts.tv_sec - INT32_MAX / 2; + ts.tv_sec -= over; + } else + over = 0; + tmp = tstosbt(ts); + prec = tmp; + prec >>= tc_precexp; + if (TIMESEL(&sbt, tmp)) + sbt += tc_tick_sbt; + sbt += tmp; + error = tsleep_sbt(&nanowait[curcpu], PWAIT | PCATCH, "nanslp", + sbt, prec, C_ABSOLUTE); + } while (error == 0 && is_abs_real && td->td_rtcgen == 0); + td->td_rtcgen = 0; if (error != EWOULDBLOCK) { + TIMESEL(&sbtt, tmp); + if (sbtt >= sbt) + return (0); if (error == ERESTART) error = EINTR; - TIMESEL(&sbtt, tmp); - if (rmt != NULL) { + if ((flags & TIMER_ABSTIME) == 0 && rmt != NULL) { ts = sbttots(sbt - sbtt); ts.tv_sec += over; if (ts.tv_sec < 0) timespecclear(&ts); *rmt = ts; } - if (sbtt >= sbt) - return (0); return (error); } return (0); @@ -543,21 +601,48 @@ struct nanosleep_args { int sys_nanosleep(struct thread *td, struct nanosleep_args *uap) { + + return (user_clock_nanosleep(td, CLOCK_REALTIME, TIMER_RELTIME, + uap->rqtp, uap->rmtp)); +} + +#ifndef _SYS_SYSPROTO_H_ +struct clock_nanosleep_args { + clockid_t clock_id; + int flags; + struct timespec *rqtp; + struct timespec *rmtp; +}; +#endif +/* ARGSUSED */ +int +sys_clock_nanosleep(struct thread *td, struct clock_nanosleep_args *uap) +{ + int error; + + error = user_clock_nanosleep(td, uap->clock_id, uap->flags, uap->rqtp, + uap->rmtp); + return (kern_posix_error(td, error)); +} + +static int +user_clock_nanosleep(struct thread *td, clockid_t clock_id, int flags, + const struct timespec *ua_rqtp, struct timespec *ua_rmtp) +{ struct timespec rmt, rqt; int error; - error = copyin(uap->rqtp, &rqt, sizeof(rqt)); + error = copyin(ua_rqtp, &rqt, sizeof(rqt)); if (error) return (error); - - if (uap->rmtp && - !useracc((caddr_t)uap->rmtp, sizeof(rmt), VM_PROT_WRITE)) - return (EFAULT); - error = kern_nanosleep(td, &rqt, &rmt); - if (error && uap->rmtp) { + if (ua_rmtp != NULL && (flags & TIMER_ABSTIME) == 0 && + !useracc(ua_rmtp, sizeof(rmt), VM_PROT_WRITE)) + return (EFAULT); + error = kern_clock_nanosleep(td, clock_id, flags, &rqt, &rmt); + if (error == EINTR && ua_rmtp != NULL && (flags & TIMER_ABSTIME) == 0) { int error2; - error2 = copyout(&rmt, uap->rmtp, sizeof(rmt)); + error2 = copyout(&rmt, ua_rmtp, sizeof(rmt)); if (error2) error = error2; } diff --git a/freebsd/sys/kern/kern_timeout.c b/freebsd/sys/kern/kern_timeout.c index 73b55338..17d23ba6 100644 --- a/freebsd/sys/kern/kern_timeout.c +++ b/freebsd/sys/kern/kern_timeout.c @@ -1345,9 +1345,12 @@ again: if (cc_exec_curr(cc, direct) == c) { /* * Succeed we to stop it or not, we must clear the - * active flag - this is what API users expect. + * active flag - this is what API users expect. If we're + * draining and the callout is currently executing, first wait + * until it finishes. */ - c->c_flags &= ~CALLOUT_ACTIVE; + if ((flags & CS_DRAIN) == 0) + c->c_flags &= ~CALLOUT_ACTIVE; if ((flags & CS_DRAIN) != 0) { /* @@ -1419,6 +1422,7 @@ again: &cc->cc_lock, "codrain", 0); #endif /* __rtems__ */ } + c->c_flags &= ~CALLOUT_ACTIVE; } else if (use_lock && !cc_exec_cancel(cc, direct) && (drain == NULL)) { diff --git a/freebsd/sys/kern/subr_bus.c b/freebsd/sys/kern/subr_bus.c index e8339c93..cf94de2f 100644 --- a/freebsd/sys/kern/subr_bus.c +++ b/freebsd/sys/kern/subr_bus.c @@ -1117,7 +1117,7 @@ devclass_driver_added(devclass_t dc, driver_t *driver) int i; /* - * Call BUS_DRIVER_ADDED for any existing busses in this class. + * Call BUS_DRIVER_ADDED for any existing buses in this class. */ for (i = 0; i < dc->maxunit; i++) if (dc->devices[i] && device_is_attached(dc->devices[i])) @@ -3309,7 +3309,7 @@ resource_list_delete(struct resource_list *rl, int type, int rid) /** * @brief Allocate a reserved resource * - * This can be used by busses to force the allocation of resources + * This can be used by buses to force the allocation of resources * that are always active in the system even if they are not allocated * by a driver (e.g. PCI BARs). This function is usually called when * adding a new child to the bus. The resource is allocated from the @@ -3688,7 +3688,7 @@ bus_generic_probe(device_t dev) * only call the identify routines of eligible drivers * when this routine is called. Drivers for later * passes should have their identify routines called - * on early-pass busses during BUS_NEW_PASS(). + * on early-pass buses during BUS_NEW_PASS(). */ if (dl->pass > bus_current_pass) continue; diff --git a/freebsd/sys/kern/subr_lock.c b/freebsd/sys/kern/subr_lock.c index 5aba8941..7c62bca0 100644 --- a/freebsd/sys/kern/subr_lock.c +++ b/freebsd/sys/kern/subr_lock.c @@ -58,6 +58,9 @@ __FBSDID("$FreeBSD$"); #include <machine/cpufunc.h> +SDT_PROVIDER_DEFINE(lock); +SDT_PROBE_DEFINE1(lock, , , starvation, "u_int"); + CTASSERT(LOCK_CLASS_MAX == 15); struct lock_class *lock_classes[LOCK_CLASS_MAX + 1] = { @@ -116,32 +119,56 @@ lock_destroy(struct lock_object *lock) } #ifndef __rtems__ +static SYSCTL_NODE(_debug, OID_AUTO, lock, CTLFLAG_RD, NULL, "lock debugging"); +static SYSCTL_NODE(_debug_lock, OID_AUTO, delay, CTLFLAG_RD, NULL, + "lock delay"); + +static u_int __read_mostly starvation_limit = 131072; +SYSCTL_INT(_debug_lock_delay, OID_AUTO, starvation_limit, CTLFLAG_RW, + &starvation_limit, 0, ""); + +static u_int __read_mostly restrict_starvation = 0; +SYSCTL_INT(_debug_lock_delay, OID_AUTO, restrict_starvation, CTLFLAG_RW, + &restrict_starvation, 0, ""); + void lock_delay(struct lock_delay_arg *la) { - u_int i, delay, backoff, min, max; struct lock_delay_config *lc = la->config; + u_int i; - delay = la->delay; + la->delay <<= 1; + if (__predict_false(la->delay > lc->max)) + la->delay = lc->max; - if (delay == 0) - delay = lc->initial; - else { - delay += lc->step; - max = lc->max; - if (delay > max) - delay = max; + for (i = la->delay; i > 0; i--) + cpu_spinwait(); + + la->spin_cnt += la->delay; + if (__predict_false(la->spin_cnt > starvation_limit)) { + SDT_PROBE1(lock, , , starvation, la->delay); + if (restrict_starvation) + la->delay = lc->base; } +} - backoff = cpu_ticks() % delay; - min = lc->min; - if (backoff < min) - backoff = min; - for (i = 0; i < backoff; i++) - cpu_spinwait(); +static u_int +lock_roundup_2(u_int val) +{ + u_int res; + + for (res = 1; res <= val; res <<= 1) + continue; + + return (res); +} - la->delay = delay; - la->spin_cnt += backoff; +void +lock_delay_default_init(struct lock_delay_config *lc) +{ + + lc->base = lock_roundup_2(mp_ncpus) / 4; + lc->max = lc->base * 1024; } #endif /* __rtems__ */ @@ -227,7 +254,7 @@ struct lock_prof_cpu { struct lock_prof_cpu *lp_cpu[MAXCPU]; -volatile int lock_prof_enable = 0; +volatile int __read_mostly lock_prof_enable; static volatile int lock_prof_resetting; #define LPROF_SBUF_SIZE 256 @@ -669,7 +696,6 @@ out: critical_exit(); } -static SYSCTL_NODE(_debug, OID_AUTO, lock, CTLFLAG_RD, NULL, "lock debugging"); static SYSCTL_NODE(_debug_lock, OID_AUTO, prof, CTLFLAG_RD, NULL, "lock profiling"); SYSCTL_INT(_debug_lock_prof, OID_AUTO, skipspin, CTLFLAG_RW, diff --git a/freebsd/sys/kern/subr_prf.c b/freebsd/sys/kern/subr_prf.c index 3aab5aaa..ffc8165f 100644 --- a/freebsd/sys/kern/subr_prf.c +++ b/freebsd/sys/kern/subr_prf.c @@ -86,6 +86,14 @@ __FBSDID("$FreeBSD$"); #include <stdarg.h> #endif +/* + * This is needed for sbuf_putbuf() when compiled into userland. Due to the + * shared nature of this file, it's the only place to put it. + */ +#ifndef _KERNEL +#include <stdio.h> +#endif + #ifdef _KERNEL #ifndef __rtems__ @@ -437,6 +445,23 @@ vprintf(const char *fmt, va_list ap) #ifndef __rtems__ static void +prf_putbuf(char *bufr, int flags, int pri) +{ + + if (flags & TOLOG) + msglogstr(bufr, pri, /*filter_cr*/1); + + if (flags & TOCONS) { + if ((panicstr == NULL) && (constty != NULL)) + msgbuf_addstr(&consmsgbuf, -1, + bufr, /*filter_cr*/ 0); + + if ((constty == NULL) ||(always_console_output)) + cnputs(bufr); + } +} + +static void putbuf(int c, struct putchar_arg *ap) { /* Check if no console output buffer was provided. */ @@ -457,18 +482,7 @@ putbuf(int c, struct putchar_arg *ap) /* Check if the buffer needs to be flushed. */ if (ap->remain == 2 || c == '\n') { - - if (ap->flags & TOLOG) - msglogstr(ap->p_bufr, ap->pri, /*filter_cr*/1); - - if (ap->flags & TOCONS) { - if ((panicstr == NULL) && (constty != NULL)) - msgbuf_addstr(&consmsgbuf, -1, - ap->p_bufr, /*filter_cr*/ 0); - - if ((constty == NULL) ||(always_console_output)) - cnputs(ap->p_bufr); - } + prf_putbuf(ap->p_bufr, ap->flags, ap->pri); ap->p_next = ap->p_bufr; ap->remain = ap->n_bufr; @@ -1259,4 +1273,20 @@ counted_warning(unsigned *counter, const char *msg) } } #endif + +#ifdef _KERNEL +void +sbuf_putbuf(struct sbuf *sb) +{ + + prf_putbuf(sbuf_data(sb), TOLOG | TOCONS, -1); +} +#else +void +sbuf_putbuf(struct sbuf *sb) +{ + + printf("%s", sbuf_data(sb)); +} +#endif #endif /* __rtems__ */ diff --git a/freebsd/sys/kern/subr_sleepqueue.c b/freebsd/sys/kern/subr_sleepqueue.c index 90023066..f90c7102 100644 --- a/freebsd/sys/kern/subr_sleepqueue.c +++ b/freebsd/sys/kern/subr_sleepqueue.c @@ -29,7 +29,7 @@ /* * Implementation of sleep queues used to hold queue of threads blocked on - * a wait channel. Sleep queues different from turnstiles in that wait + * a wait channel. Sleep queues are different from turnstiles in that wait * channels are not owned by anyone, so there is no priority propagation. * Sleep queues can also provide a timeout and can also be interrupted by * signals. That said, there are several similarities between the turnstile @@ -39,7 +39,7 @@ * a linked list of queues. An individual queue is located by using a hash * to pick a chain, locking the chain, and then walking the chain searching * for the queue. This means that a wait channel object does not need to - * embed it's queue head just as locks do not embed their turnstile queue + * embed its queue head just as locks do not embed their turnstile queue * head. Threads also carry around a sleep queue that they lend to the * wait channel when blocking. Just as in turnstiles, the queue includes * a free list of the sleep queues of other threads blocked on the same @@ -81,6 +81,9 @@ __FBSDID("$FreeBSD$"); #include <sys/sleepqueue.h> #include <sys/stack.h> #include <sys/sysctl.h> +#include <sys/time.h> + +#include <machine/atomic.h> #include <vm/uma.h> @@ -107,7 +110,7 @@ __FBSDID("$FreeBSD$"); #define SC_LOOKUP(wc) &sleepq_chains[SC_HASH(wc)] #define NR_SLEEPQS 2 /* - * There two different lists of sleep queues. Both lists are connected + * There are two different lists of sleep queues. Both lists are connected * via the sq_hash entries. The first list is the sleep queue chain list * that a sleep queue is on when it is attached to a wait channel. The * second list is the free list hung off of a sleep queue that is attached @@ -198,7 +201,7 @@ init_sleepqueue_profiling(void) for (i = 0; i < SC_TABLESIZE; i++) { snprintf(chain_name, sizeof(chain_name), "%u", i); - chain_oid = SYSCTL_ADD_NODE(NULL, + chain_oid = SYSCTL_ADD_NODE(NULL, SYSCTL_STATIC_CHILDREN(_debug_sleepq_chains), OID_AUTO, chain_name, CTLFLAG_RD, NULL, "sleepq chain stats"); SYSCTL_ADD_UINT(NULL, SYSCTL_CHILDREN(chain_oid), OID_AUTO, @@ -233,7 +236,7 @@ init_sleepqueues(void) #else NULL, NULL, sleepq_init, NULL, UMA_ALIGN_CACHE, 0); #endif - + #ifndef __rtems__ thread0.td_sleepqueue = sleepq_alloc(); #endif /* __rtems__ */ @@ -518,6 +521,7 @@ sleepq_catch_signals(void *wchan, int pri) struct sigacts *ps; int sig, ret; + ret = 0; td = curthread; p = curproc; sc = SC_LOOKUP(wchan); @@ -531,53 +535,65 @@ sleepq_catch_signals(void *wchan, int pri) } /* - * See if there are any pending signals for this thread. If not - * we can switch immediately. Otherwise do the signal processing - * directly. + * See if there are any pending signals or suspension requests for this + * thread. If not, we can switch immediately. */ thread_lock(td); - if ((td->td_flags & (TDF_NEEDSIGCHK | TDF_NEEDSUSPCHK)) == 0) { - sleepq_switch(wchan, pri); - return (0); - } - thread_unlock(td); - mtx_unlock_spin(&sc->sc_lock); - CTR3(KTR_PROC, "sleepq catching signals: thread %p (pid %ld, %s)", - (void *)td, (long)p->p_pid, td->td_name); - PROC_LOCK(p); - ps = p->p_sigacts; - mtx_lock(&ps->ps_mtx); - sig = cursig(td); - if (sig == -1) { - mtx_unlock(&ps->ps_mtx); - KASSERT((td->td_flags & TDF_SBDRY) != 0, ("lost TDF_SBDRY")); - KASSERT(TD_SBDRY_INTR(td), - ("lost TDF_SERESTART of TDF_SEINTR")); - KASSERT((td->td_flags & (TDF_SEINTR | TDF_SERESTART)) != - (TDF_SEINTR | TDF_SERESTART), - ("both TDF_SEINTR and TDF_SERESTART")); - ret = TD_SBDRY_ERRNO(td); - } else if (sig == 0) { - mtx_unlock(&ps->ps_mtx); - ret = thread_suspend_check(1); - MPASS(ret == 0 || ret == EINTR || ret == ERESTART); - } else { - if (SIGISMEMBER(ps->ps_sigintr, sig)) - ret = EINTR; - else - ret = ERESTART; - mtx_unlock(&ps->ps_mtx); + if ((td->td_flags & (TDF_NEEDSIGCHK | TDF_NEEDSUSPCHK)) != 0) { + thread_unlock(td); + mtx_unlock_spin(&sc->sc_lock); + CTR3(KTR_PROC, "sleepq catching signals: thread %p (pid %ld, %s)", + (void *)td, (long)p->p_pid, td->td_name); + PROC_LOCK(p); + /* + * Check for suspension first. Checking for signals and then + * suspending could result in a missed signal, since a signal + * can be delivered while this thread is suspended. + */ + if ((td->td_flags & TDF_NEEDSUSPCHK) != 0) { + ret = thread_suspend_check(1); + MPASS(ret == 0 || ret == EINTR || ret == ERESTART); + if (ret != 0) { + PROC_UNLOCK(p); + mtx_lock_spin(&sc->sc_lock); + thread_lock(td); + goto out; + } + } + if ((td->td_flags & TDF_NEEDSIGCHK) != 0) { + ps = p->p_sigacts; + mtx_lock(&ps->ps_mtx); + sig = cursig(td); + if (sig == -1) { + mtx_unlock(&ps->ps_mtx); + KASSERT((td->td_flags & TDF_SBDRY) != 0, + ("lost TDF_SBDRY")); + KASSERT(TD_SBDRY_INTR(td), + ("lost TDF_SERESTART of TDF_SEINTR")); + KASSERT((td->td_flags & + (TDF_SEINTR | TDF_SERESTART)) != + (TDF_SEINTR | TDF_SERESTART), + ("both TDF_SEINTR and TDF_SERESTART")); + ret = TD_SBDRY_ERRNO(td); + } else if (sig != 0) { + ret = SIGISMEMBER(ps->ps_sigintr, sig) ? + EINTR : ERESTART; + mtx_unlock(&ps->ps_mtx); + } else { + mtx_unlock(&ps->ps_mtx); + } + } + /* + * Lock the per-process spinlock prior to dropping the PROC_LOCK + * to avoid a signal delivery race. PROC_LOCK, PROC_SLOCK, and + * thread_lock() are currently held in tdsendsignal(). + */ + PROC_SLOCK(p); + mtx_lock_spin(&sc->sc_lock); + PROC_UNLOCK(p); + thread_lock(td); + PROC_SUNLOCK(p); } - /* - * Lock the per-process spinlock prior to dropping the PROC_LOCK - * to avoid a signal delivery race. PROC_LOCK, PROC_SLOCK, and - * thread_lock() are currently held in tdsendsignal(). - */ - PROC_SLOCK(p); - mtx_lock_spin(&sc->sc_lock); - PROC_UNLOCK(p); - thread_lock(td); - PROC_SUNLOCK(p); if (ret == 0) { sleepq_switch(wchan, pri); return (0); @@ -616,13 +632,14 @@ sleepq_switch(void *wchan, int pri) struct sleepqueue_chain *sc; struct sleepqueue *sq; struct thread *td; + bool rtc_changed; td = curthread; sc = SC_LOOKUP(wchan); mtx_assert(&sc->sc_lock, MA_OWNED); THREAD_LOCK_ASSERT(td, MA_OWNED); - /* + /* * If we have a sleep queue, then we've already been woken up, so * just return. */ @@ -635,8 +652,26 @@ sleepq_switch(void *wchan, int pri) * If TDF_TIMEOUT is set, then our sleep has been timed out * already but we are still on the sleep queue, so dequeue the * thread and return. + * + * Do the same if the real-time clock has been adjusted since this + * thread calculated its timeout based on that clock. This handles + * the following race: + * - The Ts thread needs to sleep until an absolute real-clock time. + * It copies the global rtc_generation into curthread->td_rtcgen, + * reads the RTC, and calculates a sleep duration based on that time. + * See umtxq_sleep() for an example. + * - The Tc thread adjusts the RTC, bumps rtc_generation, and wakes + * threads that are sleeping until an absolute real-clock time. + * See tc_setclock() and the POSIX specification of clock_settime(). + * - Ts reaches the code below. It holds the sleepqueue chain lock, + * so Tc has finished waking, so this thread must test td_rtcgen. + * (The declaration of td_rtcgen refers to this comment.) */ - if (td->td_flags & TDF_TIMEOUT) { + rtc_changed = td->td_rtcgen != 0 && td->td_rtcgen != rtc_generation; + if ((td->td_flags & TDF_TIMEOUT) || rtc_changed) { + if (rtc_changed) { + td->td_rtcgen = 0; + } MPASS(TD_ON_SLEEPQ(td)); sq = sleepq_lookup(wchan); if (sleepq_resume_thread(sq, td, 0)) { @@ -649,7 +684,7 @@ sleepq_switch(void *wchan, int pri) #endif } mtx_unlock_spin(&sc->sc_lock); - return; + return; } #ifdef SLEEPQUEUE_PROFILING if (prof_enabled) @@ -1126,6 +1161,13 @@ sleepq_signal(void *wchan, int flags, int pri, int queue) return (wakeup_swapper); } +static bool +match_any(struct thread *td __unused) +{ + + return (true); +} + /* * Resume all threads sleeping on a specified wait channel. */ @@ -1133,8 +1175,6 @@ int sleepq_broadcast(void *wchan, int flags, int pri, int queue) { struct sleepqueue *sq; - struct thread *td, *tdn; - int wakeup_swapper; CTR2(KTR_PROC, "sleepq_broadcast(%p, %d)", wchan, flags); KASSERT(wchan != NULL, ("%s: invalid NULL wait channel", __func__)); @@ -1145,18 +1185,33 @@ sleepq_broadcast(void *wchan, int flags, int pri, int queue) KASSERT(sq->sq_type == (flags & SLEEPQ_TYPE), ("%s: mismatch between sleep/wakeup and cv_*", __func__)); + return (sleepq_remove_matching(sq, queue, match_any, pri)); +} + +/* + * Resume threads on the sleep queue that match the given predicate. + */ +int +sleepq_remove_matching(struct sleepqueue *sq, int queue, + bool (*matches)(struct thread *), int pri) +{ + struct thread *td, *tdn; + int wakeup_swapper; + /* - * Resume all blocked threads on the sleep queue. The last thread will - * be given ownership of sq and may re-enqueue itself before - * sleepq_resume_thread() returns, so we must cache the "next" queue - * item at the beginning of the final iteration. + * The last thread will be given ownership of sq and may + * re-enqueue itself before sleepq_resume_thread() returns, + * so we must cache the "next" queue item at the beginning + * of the final iteration. */ wakeup_swapper = 0; TAILQ_FOREACH_SAFE(td, &sq->sq_blocked[queue], td_slpq, tdn) { thread_lock(td); - wakeup_swapper |= sleepq_resume_thread(sq, td, pri); + if (matches(td)) + wakeup_swapper |= sleepq_resume_thread(sq, td, pri); thread_unlock(td); } + return (wakeup_swapper); } @@ -1339,6 +1394,32 @@ sleepq_abort(struct thread *td, int intrval) } #endif /* __rtems__ */ +void +sleepq_chains_remove_matching(bool (*matches)(struct thread *)) +{ + struct sleepqueue_chain *sc; + struct sleepqueue *sq; + int i, wakeup_swapper; + + wakeup_swapper = 0; + for (sc = &sleepq_chains[0]; sc < sleepq_chains + SC_TABLESIZE; ++sc) { + if (LIST_EMPTY(&sc->sc_queues)) { + continue; + } + mtx_lock_spin(&sc->sc_lock); + LIST_FOREACH(sq, &sc->sc_queues, sq_hash) { + for (i = 0; i < NR_SLEEPQS; ++i) { + wakeup_swapper |= sleepq_remove_matching(sq, i, + matches, 0); + } + } + mtx_unlock_spin(&sc->sc_lock); + } + if (wakeup_swapper) { + kick_proc0(); + } +} + /* * Prints the stacks of all threads presently sleeping on wchan/queue to * the sbuf sb. Sets count_stacks_printed to the number of stacks actually diff --git a/freebsd/sys/kern/subr_taskqueue.c b/freebsd/sys/kern/subr_taskqueue.c index 5ef8683c..c739ccf3 100644 --- a/freebsd/sys/kern/subr_taskqueue.c +++ b/freebsd/sys/kern/subr_taskqueue.c @@ -522,6 +522,23 @@ task_is_running(struct taskqueue *queue, struct task *task) return (0); } +/* + * Only use this function in single threaded contexts. It returns + * non-zero if the given task is either pending or running. Else the + * task is idle and can be queued again or freed. + */ +int +taskqueue_poll_is_busy(struct taskqueue *queue, struct task *task) +{ + int retval; + + TQ_LOCK(queue); + retval = task->ta_pending > 0 || task_is_running(queue, task); + TQ_UNLOCK(queue); + + return (retval); +} + static int taskqueue_cancel_locked(struct taskqueue *queue, struct task *task, u_int *pendp) diff --git a/freebsd/sys/kern/subr_uio.c b/freebsd/sys/kern/subr_uio.c index f5dc76e7..3a34e521 100644 --- a/freebsd/sys/kern/subr_uio.c +++ b/freebsd/sys/kern/subr_uio.c @@ -492,10 +492,11 @@ copyout_map(struct thread *td, vm_offset_t *addr, size_t sz) /* round size up to page boundary */ size = (vm_size_t)round_page(sz); - - error = vm_mmap(&vms->vm_map, addr, size, VM_PROT_READ | VM_PROT_WRITE, - VM_PROT_ALL, MAP_PRIVATE | MAP_ANON, OBJT_DEFAULT, NULL, 0); - + if (size == 0) + return (EINVAL); + error = vm_mmap_object(&vms->vm_map, addr, size, VM_PROT_READ | + VM_PROT_WRITE, VM_PROT_ALL, MAP_PRIVATE | MAP_ANON, NULL, 0, + FALSE, td); return (error); } diff --git a/freebsd/sys/kern/subr_unit.c b/freebsd/sys/kern/subr_unit.c index 678916f8..cdbe5343 100644 --- a/freebsd/sys/kern/subr_unit.c +++ b/freebsd/sys/kern/subr_unit.c @@ -218,7 +218,7 @@ ub_full(struct unrb *ub, int len) * Consistency check function. * * Checks the internal consistency as well as we can. - * + * * Called at all boundaries of this API. */ static void @@ -242,7 +242,7 @@ check_unrhdr(struct unrhdr *uh, int line) w = 0; bit_count(ub->map, 0, up->len, &w); y += w; - } else if (up->ptr != NULL) + } else if (up->ptr != NULL) y += up->len; } KASSERT (y == uh->busy, @@ -377,7 +377,7 @@ is_bitmap(struct unrhdr *uh, struct unr *up) /* * Look for sequence of items which can be combined into a bitmap, if * multiple are present, take the one which saves most memory. - * + * * Return (1) if a sequence was found to indicate that another call * might be able to do more. Return (0) if we found no suitable sequence. * @@ -593,7 +593,7 @@ alloc_unrl(struct unrhdr *uh) } /* - * We can always allocate from the first list element, so if we have + * We can always allocate from the first list element, so if we have * nothing on the list, we must have run out of unit numbers. */ if (up == NULL) @@ -805,7 +805,7 @@ free_unrl(struct unrhdr *uh, u_int item, void **p1, void **p2) /* Handle bitmap items */ if (is_bitmap(uh, up)) { ub = up->ptr; - + KASSERT(bit_test(ub->map, item) != 0, ("UNR: Freeing free item %d (bitmap)\n", item)); bit_clear(ub->map, item); @@ -911,7 +911,7 @@ print_unr(struct unrhdr *uh, struct unr *up) for (x = 0; x < up->len; x++) { if (bit_test(ub->map, x)) printf("#"); - else + else printf(" "); } printf("]\n"); @@ -988,7 +988,7 @@ main(int argc, char **argv) long count = 10000; /* Number of unrs to test */ long reps = 1, m; int ch; - u_int i, x, j; + u_int i, j; verbose = false; @@ -1001,7 +1001,7 @@ main(int argc, char **argv) usage(argv); exit(2); } - + break; case 'v': verbose = true; @@ -1028,7 +1028,6 @@ main(int argc, char **argv) printf("sizeof(struct unrb) %zu\n", sizeof(struct unrb)); printf("sizeof(struct unrhdr) %zu\n", sizeof(struct unrhdr)); printf("NBITS %lu\n", (unsigned long)NBITS); - x = 1; for (m = 0; m < count * reps; m++) { j = random(); i = (j >> 1) % count; diff --git a/freebsd/sys/kern/sys_generic.c b/freebsd/sys/kern/sys_generic.c index d6b3d15b..c3bf19f5 100644 --- a/freebsd/sys/kern/sys_generic.c +++ b/freebsd/sys/kern/sys_generic.c @@ -232,39 +232,37 @@ struct pread_args { }; #endif int -sys_pread(td, uap) - struct thread *td; - struct pread_args *uap; +sys_pread(struct thread *td, struct pread_args *uap) +{ + + return (kern_pread(td, uap->fd, uap->buf, uap->nbyte, uap->offset)); +} + +int +kern_pread(struct thread *td, int fd, void *buf, size_t nbyte, off_t offset) { struct uio auio; struct iovec aiov; int error; - if (uap->nbyte > IOSIZE_MAX) + if (nbyte > IOSIZE_MAX) return (EINVAL); - aiov.iov_base = uap->buf; - aiov.iov_len = uap->nbyte; + aiov.iov_base = buf; + aiov.iov_len = nbyte; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; - auio.uio_resid = uap->nbyte; + auio.uio_resid = nbyte; auio.uio_segflg = UIO_USERSPACE; - error = kern_preadv(td, uap->fd, &auio, uap->offset); - return(error); + error = kern_preadv(td, fd, &auio, offset); + return (error); } #if defined(COMPAT_FREEBSD6) int -freebsd6_pread(td, uap) - struct thread *td; - struct freebsd6_pread_args *uap; +freebsd6_pread(struct thread *td, struct freebsd6_pread_args *uap) { - struct pread_args oargs; - oargs.fd = uap->fd; - oargs.buf = uap->buf; - oargs.nbyte = uap->nbyte; - oargs.offset = uap->offset; - return (sys_pread(td, &oargs)); + return (kern_pread(td, uap->fd, uap->buf, uap->nbyte, uap->offset)); } #endif @@ -348,7 +346,8 @@ kern_preadv(td, fd, auio, offset) return (error); if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE)) error = ESPIPE; - else if (offset < 0 && fp->f_vnode->v_type != VCHR) + else if (offset < 0 && + (fp->f_vnode == NULL || fp->f_vnode->v_type != VCHR)) error = EINVAL; else error = dofileread(td, fd, fp, auio, offset, FOF_OFFSET); @@ -447,39 +446,38 @@ struct pwrite_args { }; #endif int -sys_pwrite(td, uap) - struct thread *td; - struct pwrite_args *uap; +sys_pwrite(struct thread *td, struct pwrite_args *uap) +{ + + return (kern_pwrite(td, uap->fd, uap->buf, uap->nbyte, uap->offset)); +} + +int +kern_pwrite(struct thread *td, int fd, const void *buf, size_t nbyte, + off_t offset) { struct uio auio; struct iovec aiov; int error; - if (uap->nbyte > IOSIZE_MAX) + if (nbyte > IOSIZE_MAX) return (EINVAL); - aiov.iov_base = (void *)(uintptr_t)uap->buf; - aiov.iov_len = uap->nbyte; + aiov.iov_base = (void *)(uintptr_t)buf; + aiov.iov_len = nbyte; auio.uio_iov = &aiov; auio.uio_iovcnt = 1; - auio.uio_resid = uap->nbyte; + auio.uio_resid = nbyte; auio.uio_segflg = UIO_USERSPACE; - error = kern_pwritev(td, uap->fd, &auio, uap->offset); + error = kern_pwritev(td, fd, &auio, offset); return(error); } #if defined(COMPAT_FREEBSD6) int -freebsd6_pwrite(td, uap) - struct thread *td; - struct freebsd6_pwrite_args *uap; +freebsd6_pwrite(struct thread *td, struct freebsd6_pwrite_args *uap) { - struct pwrite_args oargs; - oargs.fd = uap->fd; - oargs.buf = uap->buf; - oargs.nbyte = uap->nbyte; - oargs.offset = uap->offset; - return (sys_pwrite(td, &oargs)); + return (kern_pwrite(td, uap->fd, uap->buf, uap->nbyte, uap->offset)); } #endif @@ -563,7 +561,8 @@ kern_pwritev(td, fd, auio, offset) return (error); if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE)) error = ESPIPE; - else if (offset < 0 && fp->f_vnode->v_type != VCHR) + else if (offset < 0 && + (fp->f_vnode == NULL || fp->f_vnode->v_type != VCHR)) error = EINVAL; else error = dofilewrite(td, fd, fp, auio, offset, FOF_OFFSET); diff --git a/freebsd/sys/kern/uipc_mbuf.c b/freebsd/sys/kern/uipc_mbuf.c index 571dd3bd..74f4f0b4 100644 --- a/freebsd/sys/kern/uipc_mbuf.c +++ b/freebsd/sys/kern/uipc_mbuf.c @@ -171,7 +171,7 @@ CTASSERT(sizeof(struct m_ext) == 28); * plain pointer does. */ #ifdef INVARIANTS -static struct mbuf m_assertbuf; +static struct mbuf __used m_assertbuf; CTASSERT(sizeof(m_assertbuf.m_slist) == sizeof(m_assertbuf.m_next)); CTASSERT(sizeof(m_assertbuf.m_stailq) == sizeof(m_assertbuf.m_next)); CTASSERT(sizeof(m_assertbuf.m_slistpkt) == sizeof(m_assertbuf.m_nextpkt)); @@ -1563,7 +1563,7 @@ m_uiotombuf(struct uio *uio, int how, int len, int align, int flags) * Copy an mbuf chain into a uio limited by len if set. */ int -m_mbuftouio(struct uio *uio, struct mbuf *m, int len) +m_mbuftouio(struct uio *uio, const struct mbuf *m, int len) { int error, length, total; int progress = 0; diff --git a/freebsd/sys/kern/uipc_mbuf2.c b/freebsd/sys/kern/uipc_mbuf2.c index fc5c8e8a..dd73910d 100644 --- a/freebsd/sys/kern/uipc_mbuf2.c +++ b/freebsd/sys/kern/uipc_mbuf2.c @@ -161,7 +161,7 @@ m_pulldown(struct mbuf *m, int off, int len, int *offp) * the target data is on <n, off>. * if we got enough data on the mbuf "n", we're done. */ - if ((off == 0 || offp) && len <= n->m_len - off && writable) + if ((off == 0 || offp) && len <= n->m_len - off) goto ok; /* diff --git a/freebsd/sys/kern/uipc_sockbuf.c b/freebsd/sys/kern/uipc_sockbuf.c index a7977141..b2a6460a 100644 --- a/freebsd/sys/kern/uipc_sockbuf.c +++ b/freebsd/sys/kern/uipc_sockbuf.c @@ -1049,6 +1049,11 @@ sbcut_internal(struct sockbuf *sb, int len) { struct mbuf *m, *next, *mfree; + KASSERT(len >= 0, ("%s: len is %d but it is supposed to be >= 0", + __func__, len)); + KASSERT(len <= sb->sb_ccc, ("%s: len: %d is > ccc: %u", + __func__, len, sb->sb_ccc)); + next = (m = sb->sb_mb) ? m->m_nextpkt : 0; mfree = NULL; diff --git a/freebsd/sys/kern/uipc_socket.c b/freebsd/sys/kern/uipc_socket.c index 5f01844d..59a52115 100644 --- a/freebsd/sys/kern/uipc_socket.c +++ b/freebsd/sys/kern/uipc_socket.c @@ -165,18 +165,13 @@ static void filt_sordetach(struct knote *kn); static int filt_soread(struct knote *kn, long hint); static void filt_sowdetach(struct knote *kn); static int filt_sowrite(struct knote *kn, long hint); -static int filt_solisten(struct knote *kn, long hint); static int inline hhook_run_socket(struct socket *so, void *hctx, int32_t h_id); +static int filt_soempty(struct knote *kn, long hint); #ifdef __rtems__ static #endif /* __rtems__ */ fo_kqfilter_t soo_kqfilter; -static struct filterops solisten_filtops = { - .f_isfd = 1, - .f_detach = filt_sordetach, - .f_event = filt_solisten, -}; static struct filterops soread_filtops = { .f_isfd = 1, .f_detach = filt_sordetach, @@ -187,6 +182,11 @@ static struct filterops sowrite_filtops = { .f_detach = filt_sowdetach, .f_event = filt_sowrite, }; +static struct filterops soempty_filtops = { + .f_isfd = 1, + .f_detach = filt_sowdetach, + .f_event = filt_soempty, +}; so_gen_t so_gencnt; /* generation count for sockets */ @@ -2714,6 +2714,26 @@ sosetopt(struct socket *so, struct sockopt *sopt) #endif break; + case SO_TS_CLOCK: + error = sooptcopyin(sopt, &optval, sizeof optval, + sizeof optval); + if (error) + goto bad; + if (optval < 0 || optval > SO_TS_CLOCK_MAX) { + error = EINVAL; + goto bad; + } + so->so_ts_clock = optval; + break; + + case SO_MAX_PACING_RATE: + error = sooptcopyin(sopt, &val32, sizeof(val32), + sizeof(val32)); + if (error) + goto bad; + so->so_max_pacing_rate = val32; + break; + default: if (V_socket_hhh[HHOOK_SOCKET_OPT]->hhh_nhooks > 0) error = hhook_run_socket(so, sopt, @@ -2901,6 +2921,14 @@ integer: optval = so->so_incqlen; goto integer; + case SO_TS_CLOCK: + optval = so->so_ts_clock; + goto integer; + + case SO_MAX_PACING_RATE: + optval = so->so_max_pacing_rate; + goto integer; + default: if (V_socket_hhh[HHOOK_SOCKET_OPT]->hhh_nhooks > 0) error = hhook_run_socket(so, sopt, @@ -3108,16 +3136,17 @@ soo_kqfilter(struct file *fp, struct knote *kn) switch (kn->kn_filter) { case EVFILT_READ: - if (so->so_options & SO_ACCEPTCONN) - kn->kn_fop = &solisten_filtops; - else - kn->kn_fop = &soread_filtops; + kn->kn_fop = &soread_filtops; sb = &so->so_rcv; break; case EVFILT_WRITE: kn->kn_fop = &sowrite_filtops; sb = &so->so_snd; break; + case EVFILT_EMPTY: + kn->kn_fop = &soempty_filtops; + sb = &so->so_snd; + break; default: return (EINVAL); } @@ -3327,6 +3356,11 @@ filt_soread(struct knote *kn, long hint) struct socket *so; so = kn->kn_fp->f_data; + if (so->so_options & SO_ACCEPTCONN) { + kn->kn_data = so->so_qlen; + return (!TAILQ_EMPTY(&so->so_comp)); + + } SOCKBUF_LOCK_ASSERT(&so->so_rcv); kn->kn_data = sbavail(&so->so_rcv) - so->so_rcv.sb_ctl; @@ -3339,11 +3373,9 @@ filt_soread(struct knote *kn, long hint) if (kn->kn_sfflags & NOTE_LOWAT) { if (kn->kn_data >= kn->kn_sdata) - return 1; - } else { - if (sbavail(&so->so_rcv) >= so->so_rcv.sb_lowat) - return 1; - } + return (1); + } else if (sbavail(&so->so_rcv) >= so->so_rcv.sb_lowat) + return (1); /* This hook returning non-zero indicates an event, not error */ return (hhook_run_socket(so, NULL, HHOOK_FILT_SOREAD)); @@ -3388,14 +3420,19 @@ filt_sowrite(struct knote *kn, long hint) return (kn->kn_data >= so->so_snd.sb_lowat); } -/*ARGSUSED*/ static int -filt_solisten(struct knote *kn, long hint) +filt_soempty(struct knote *kn, long hint) { - struct socket *so = kn->kn_fp->f_data; + struct socket *so; + + so = kn->kn_fp->f_data; + SOCKBUF_LOCK_ASSERT(&so->so_snd); + kn->kn_data = sbused(&so->so_snd); - kn->kn_data = so->so_qlen; - return (!TAILQ_EMPTY(&so->so_comp)); + if (kn->kn_data == 0) + return (1); + else + return (0); } #ifndef __rtems__ diff --git a/freebsd/sys/kern/uipc_syscalls.c b/freebsd/sys/kern/uipc_syscalls.c index c6fdb26f..41701401 100644 --- a/freebsd/sys/kern/uipc_syscalls.c +++ b/freebsd/sys/kern/uipc_syscalls.c @@ -167,13 +167,19 @@ static int sys_socket(struct thread *td, struct socket_args *uap) { + + return (kern_socket(td, uap->domain, uap->type, uap->protocol)); +} + +int +kern_socket(struct thread *td, int domain, int type, int protocol) +{ struct socket *so; struct file *fp; - int fd, error, type, oflag, fflag; + int fd, error, oflag, fflag; - AUDIT_ARG_SOCKET(uap->domain, uap->type, uap->protocol); + AUDIT_ARG_SOCKET(domain, type, protocol); - type = uap->type; oflag = 0; fflag = 0; if ((type & SOCK_CLOEXEC) != 0) { @@ -186,8 +192,7 @@ sys_socket(struct thread *td, struct socket_args *uap) } #ifdef MAC - error = mac_socket_check_create(td->td_ucred, uap->domain, type, - uap->protocol); + error = mac_socket_check_create(td->td_ucred, domain, type, protocol); if (error != 0) return (error); #endif @@ -195,8 +200,7 @@ sys_socket(struct thread *td, struct socket_args *uap) if (error != 0) return (error); /* An extra reference on `fp' has been held for us by falloc(). */ - error = socreate(uap->domain, &so, type, uap->protocol, - td->td_ucred, td); + error = socreate(domain, &so, type, protocol, td->td_ucred, td); if (error != 0) { fdclose(td, fp, fd); } else { @@ -329,13 +333,20 @@ sys_bindat(struct thread *td, struct bindat_args *uap) int sys_listen(struct thread *td, struct listen_args *uap) { + + return (kern_listen(td, uap->s, uap->backlog)); +} + +int +kern_listen(struct thread *td, int s, int backlog) +{ struct socket *so; struct file *fp; cap_rights_t rights; int error; - AUDIT_ARG_FD(uap->s); - error = getsock_cap(td, uap->s, cap_rights_init(&rights, CAP_LISTEN), + AUDIT_ARG_FD(s); + error = getsock_cap(td, s, cap_rights_init(&rights, CAP_LISTEN), &fp, NULL, NULL); if (error == 0) { so = fp->f_data; @@ -343,10 +354,10 @@ sys_listen(struct thread *td, struct listen_args *uap) error = mac_socket_check_listen(td->td_ucred, so); if (error == 0) #endif - error = solisten(so, uap->backlog, td); + error = solisten(so, backlog, td); fdrop(fp, td); } - return(error); + return (error); } #ifdef __rtems__ int @@ -1581,17 +1592,24 @@ static int sys_shutdown(struct thread *td, struct shutdown_args *uap) { + + return (kern_shutdown(td, uap->s, uap->how)); +} + +int +kern_shutdown(struct thread *td, int s, int how) +{ struct socket *so; struct file *fp; cap_rights_t rights; int error; - AUDIT_ARG_FD(uap->s); - error = getsock_cap(td, uap->s, cap_rights_init(&rights, CAP_SHUTDOWN), + AUDIT_ARG_FD(s); + error = getsock_cap(td, s, cap_rights_init(&rights, CAP_SHUTDOWN), &fp, NULL, NULL); if (error == 0) { so = fp->f_data; - error = soshutdown(so, uap->how); + error = soshutdown(so, how); #ifndef __rtems__ /* * Previous versions did not return ENOTCONN, but 0 in diff --git a/freebsd/sys/kern/uipc_usrreq.c b/freebsd/sys/kern/uipc_usrreq.c index 159de132..b3b55402 100644 --- a/freebsd/sys/kern/uipc_usrreq.c +++ b/freebsd/sys/kern/uipc_usrreq.c @@ -842,6 +842,9 @@ uipc_listen(struct socket *so, int backlog, struct thread *td) struct unpcb *unp; int error; + if (so->so_type != SOCK_STREAM && so->so_type != SOCK_SEQPACKET) + return (EOPNOTSUPP); + unp = sotounpcb(so); KASSERT(unp != NULL, ("uipc_listen: unp == NULL")); @@ -2058,6 +2061,7 @@ unp_internalize(struct mbuf **controlp, struct thread *td) struct filedescent *fde, **fdep, *fdev; struct file *fp; struct timeval *tv; + struct timespec *ts; int i, *fdp; void *data; socklen_t clen = control->m_len, datalen; @@ -2178,6 +2182,30 @@ unp_internalize(struct mbuf **controlp, struct thread *td) bintime(bt); break; + case SCM_REALTIME: + *controlp = sbcreatecontrol(NULL, sizeof(*ts), + SCM_REALTIME, SOL_SOCKET); + if (*controlp == NULL) { + error = ENOBUFS; + goto out; + } + ts = (struct timespec *) + CMSG_DATA(mtod(*controlp, struct cmsghdr *)); + nanotime(ts); + break; + + case SCM_MONOTONIC: + *controlp = sbcreatecontrol(NULL, sizeof(*ts), + SCM_MONOTONIC, SOL_SOCKET); + if (*controlp == NULL) { + error = ENOBUFS; + goto out; + } + ts = (struct timespec *) + CMSG_DATA(mtod(*controlp, struct cmsghdr *)); + nanouptime(ts); + break; + default: error = EINVAL; goto out; |