diff options
author | Sebastian Huber <sebastian.huber@embedded-brains.de> | 2013-11-06 16:20:21 +0100 |
---|---|---|
committer | Sebastian Huber <sebastian.huber@embedded-brains.de> | 2013-11-11 10:08:08 +0100 |
commit | 66659ff1ad6831b0ea7425fa6ecd8a8687523658 (patch) | |
tree | 48e22b475fa8854128e0861a33fed6f78c8094b5 /freebsd/sys/kern | |
parent | Define __GLOBL1() and __GLOBL() (diff) | |
download | rtems-libbsd-66659ff1ad6831b0ea7425fa6ecd8a8687523658.tar.bz2 |
Update to FreeBSD 9.2
Diffstat (limited to 'freebsd/sys/kern')
28 files changed, 2673 insertions, 1104 deletions
diff --git a/freebsd/sys/kern/init_main.c b/freebsd/sys/kern/init_main.c index 23170bb6..e694f479 100644 --- a/freebsd/sys/kern/init_main.c +++ b/freebsd/sys/kern/init_main.c @@ -57,11 +57,13 @@ __FBSDID("$FreeBSD$"); #include <sys/jail.h> #include <sys/ktr.h> #include <rtems/bsd/sys/lock.h> +#include <sys/loginclass.h> #include <sys/mount.h> #include <sys/mutex.h> #include <sys/syscallsubr.h> #include <sys/sysctl.h> #include <sys/proc.h> +#include <sys/racct.h> #include <sys/resourcevar.h> #include <sys/systm.h> #include <sys/signalvar.h> @@ -103,9 +105,11 @@ struct vmspace vmspace0; struct proc *initproc; int boothowto = 0; /* initialized so that it can be patched */ -SYSCTL_INT(_debug, OID_AUTO, boothowto, CTLFLAG_RD, &boothowto, 0, ""); +SYSCTL_INT(_debug, OID_AUTO, boothowto, CTLFLAG_RD, &boothowto, 0, + "Boot control flags, passed from loader"); int bootverbose; -SYSCTL_INT(_debug, OID_AUTO, bootverbose, CTLFLAG_RW, &bootverbose, 0, ""); +SYSCTL_INT(_debug, OID_AUTO, bootverbose, CTLFLAG_RW, &bootverbose, 0, + "Control the output of verbose kernel messages"); /* * This ensures that there is at least one entry so that the sysinit_set @@ -160,6 +164,24 @@ sysinit_add(struct sysinit **set, struct sysinit **set_end) } #endif /* __rtems__ */ +#if defined (DDB) && defined(VERBOSE_SYSINIT) +static const char * +symbol_name(vm_offset_t va, db_strategy_t strategy) +{ + const char *name; + c_db_sym_t sym; + db_expr_t offset; + + if (va == 0) + return (NULL); + sym = db_search_symbol(va, strategy, &offset); + if (offset != 0) + return (NULL); + db_symbol_values(sym, &name, NULL); + return (name); +} +#endif + /* * System startup; initialize the world, create process 0, mount root * filesystem, and fork to create init and pagedaemon. Most of the @@ -248,15 +270,16 @@ restart: } if (verbose) { #if defined(DDB) - const char *name; - c_db_sym_t sym; - db_expr_t offset; - - sym = db_search_symbol((vm_offset_t)(*sipp)->func, - DB_STGY_PROC, &offset); - db_symbol_values(sym, &name, NULL); - if (name != NULL) - printf(" %s(%p)... ", name, (*sipp)->udata); + const char *func, *data; + + func = symbol_name((vm_offset_t)(*sipp)->func, + DB_STGY_PROC); + data = symbol_name((vm_offset_t)(*sipp)->udata, + DB_STGY_ANY); + if (func != NULL && data != NULL) + printf(" %s(&%s)... ", func, data); + else if (func != NULL) + printf(" %s(%p)... ", func, (*sipp)->udata); else #endif printf(" %p(%p)... ", (*sipp)->func, @@ -418,8 +441,9 @@ proc0_init(void *dummy __unused) { #ifndef __rtems__ struct proc *p; - unsigned i; struct thread *td; + vm_paddr_t pageablemem; + int i; GIANT_REQUIRED; p = &proc0; @@ -474,11 +498,14 @@ proc0_init(void *dummy __unused) knlist_init_mtx(&p->p_klist, &p->p_mtx); STAILQ_INIT(&p->p_ktr); p->p_nice = NZERO; + /* pid_max cannot be greater than PID_MAX */ td->td_tid = PID_MAX + 1; + LIST_INSERT_HEAD(TIDHASH(td->td_tid), td, td_hash); td->td_state = TDS_RUNNING; td->td_pri_class = PRI_TIMESHARE; td->td_user_pri = PUSER; td->td_base_user_pri = PUSER; + td->td_lend_user_pri = PRI_MAX; td->td_priority = PVM; td->td_base_pri = PVM; td->td_oncpu = 0; @@ -492,7 +519,7 @@ proc0_init(void *dummy __unused) strncpy(p->p_comm, "kernel", sizeof (p->p_comm)); strncpy(td->td_name, "swapper", sizeof (td->td_name)); - callout_init(&p->p_itcallout, CALLOUT_MPSAFE); + callout_init_mtx(&p->p_itcallout, &p->p_mtx, 0); callout_init_mtx(&p->p_limco, &p->p_mtx, 0); callout_init(&td->td_slpcallout, CALLOUT_MPSAFE); @@ -502,6 +529,7 @@ proc0_init(void *dummy __unused) p->p_ucred->cr_uidinfo = uifind(0); p->p_ucred->cr_ruidinfo = uifind(0); p->p_ucred->cr_prison = &prison0; + p->p_ucred->cr_loginclass = loginclass_find("default"); #ifdef AUDIT audit_cred_kproc0(p->p_ucred); #endif @@ -529,12 +557,21 @@ proc0_init(void *dummy __unused) p->p_limit->pl_rlimit[RLIMIT_NOFILE].rlim_max = maxfiles; p->p_limit->pl_rlimit[RLIMIT_NPROC].rlim_cur = p->p_limit->pl_rlimit[RLIMIT_NPROC].rlim_max = maxproc; - i = ptoa(cnt.v_free_count); - p->p_limit->pl_rlimit[RLIMIT_RSS].rlim_max = i; - p->p_limit->pl_rlimit[RLIMIT_MEMLOCK].rlim_max = i; - p->p_limit->pl_rlimit[RLIMIT_MEMLOCK].rlim_cur = i / 3; + p->p_limit->pl_rlimit[RLIMIT_DATA].rlim_cur = dfldsiz; + p->p_limit->pl_rlimit[RLIMIT_DATA].rlim_max = maxdsiz; + p->p_limit->pl_rlimit[RLIMIT_STACK].rlim_cur = dflssiz; + p->p_limit->pl_rlimit[RLIMIT_STACK].rlim_max = maxssiz; + /* Cast to avoid overflow on i386/PAE. */ + pageablemem = ptoa((vm_paddr_t)cnt.v_free_count); + p->p_limit->pl_rlimit[RLIMIT_RSS].rlim_cur = + p->p_limit->pl_rlimit[RLIMIT_RSS].rlim_max = pageablemem; + p->p_limit->pl_rlimit[RLIMIT_MEMLOCK].rlim_cur = pageablemem / 3; + p->p_limit->pl_rlimit[RLIMIT_MEMLOCK].rlim_max = pageablemem; p->p_cpulimit = RLIM_INFINITY; + /* Initialize resource accounting structures. */ + racct_create(&p->p_racct); + p->p_stats = pstats_alloc(); /* Allocate a prototype map so we have something to fork. */ @@ -546,14 +583,12 @@ proc0_init(void *dummy __unused) * proc0 is not expected to enter usermode, so there is no special * handling for sv_minuser here, like is done for exec_new_vmspace(). */ - vm_map_init(&vmspace0.vm_map, p->p_sysent->sv_minuser, - p->p_sysent->sv_maxuser); - vmspace0.vm_map.pmap = vmspace_pmap(&vmspace0); - - /*- - * call the init and ctor for the new thread and proc - * we wait to do this until all other structures - * are fairly sane. + vm_map_init(&vmspace0.vm_map, vmspace_pmap(&vmspace0), + p->p_sysent->sv_minuser, p->p_sysent->sv_maxuser); + + /* + * Call the init and ctor for the new thread and proc. We wait + * to do this until all other structures are fairly sane. */ EVENTHANDLER_INVOKE(process_init, p); EVENTHANDLER_INVOKE(thread_init, td); @@ -564,6 +599,9 @@ proc0_init(void *dummy __unused) * Charge root for one process. */ (void)chgproccnt(p->p_ucred->cr_ruidinfo, 1, 0); + PROC_LOCK(p); + racct_add_force(p, RACCT_NPROC, 1); + PROC_UNLOCK(p); #endif /* __rtems__ */ } SYSINIT(p0init, SI_SUB_INTRINSIC, SI_ORDER_FIRST, proc0_init, NULL); @@ -661,7 +699,8 @@ SYSCTL_STRING(_kern, OID_AUTO, init_path, CTLFLAG_RD, init_path, 0, #endif static int init_shutdown_timeout = INIT_SHUTDOWN_TIMEOUT; SYSCTL_INT(_kern, OID_AUTO, init_shutdown_timeout, - CTLFLAG_RW, &init_shutdown_timeout, 0, ""); + CTLFLAG_RW, &init_shutdown_timeout, 0, "Shutdown timeout of init(8). " + "Unused within kernel, but used to control init(8)"); /* * Start the initial user process; try exec'ing each pathname in init_path. @@ -770,7 +809,7 @@ start_init(void *dummy) * Otherwise, return via fork_trampoline() all the way * to user mode as init! */ - if ((error = execve(td, &args)) == 0) { + if ((error = sys_execve(td, &args)) == 0) { mtx_unlock(&Giant); return; } @@ -795,7 +834,8 @@ create_init(const void *udata __unused) struct ucred *newcred, *oldcred; int error; - error = fork1(&thread0, RFFDG | RFPROC | RFSTOPPED, 0, &initproc); + error = fork1(&thread0, RFFDG | RFPROC | RFSTOPPED, 0, &initproc, + NULL, 0); if (error) panic("cannot fork init: %d\n", error); KASSERT(initproc->p_pid == 1, ("create_init: initproc->p_pid != 1")); diff --git a/freebsd/sys/kern/kern_event.c b/freebsd/sys/kern/kern_event.c index 449bc991..69c47246 100644 --- a/freebsd/sys/kern/kern_event.c +++ b/freebsd/sys/kern/kern_event.c @@ -35,6 +35,7 @@ __FBSDID("$FreeBSD$"); #include <rtems/bsd/sys/param.h> #include <sys/systm.h> +#include <sys/capability.h> #include <sys/kernel.h> #include <rtems/bsd/sys/lock.h> #include <sys/mutex.h> @@ -131,6 +132,8 @@ static struct fileops kqueueops = { .fo_kqfilter = kqueue_kqfilter, .fo_stat = kqueue_stat, .fo_close = kqueue_close, + .fo_chmod = invfo_chmod, + .fo_chown = invfo_chown, }; #else /* __rtems__ */ static const rtems_filesystem_file_handlers_r kqueueops; @@ -162,17 +165,30 @@ static int filt_user(struct knote *kn, long hint); static void filt_usertouch(struct knote *kn, struct kevent *kev, u_long type); -static struct filterops file_filtops = - { 1, filt_fileattach, NULL, NULL }; -static struct filterops kqread_filtops = - { 1, NULL, filt_kqdetach, filt_kqueue }; +static struct filterops file_filtops = { + .f_isfd = 1, + .f_attach = filt_fileattach, +}; +static struct filterops kqread_filtops = { + .f_isfd = 1, + .f_detach = filt_kqdetach, + .f_event = filt_kqueue, +}; /* XXX - move to kern_proc.c? */ #ifndef __rtems__ -static struct filterops proc_filtops = - { 0, filt_procattach, filt_procdetach, filt_proc }; +static struct filterops proc_filtops = { + .f_isfd = 0, + .f_attach = filt_procattach, + .f_detach = filt_procdetach, + .f_event = filt_proc, +}; #endif /* __rtems__ */ -static struct filterops timer_filtops = - { 0, filt_timerattach, filt_timerdetach, filt_timer }; +static struct filterops timer_filtops = { + .f_isfd = 0, + .f_attach = filt_timerattach, + .f_detach = filt_timerdetach, + .f_event = filt_timer, +}; static struct filterops user_filtops = { .f_attach = filt_userattach, .f_detach = filt_userdetach, @@ -256,8 +272,10 @@ filt_nullattach(struct knote *kn) return (ENXIO); }; -struct filterops null_filtops = - { 0, filt_nullattach, NULL, NULL }; +struct filterops null_filtops = { + .f_isfd = 0, + .f_attach = filt_nullattach, +}; /* XXX - make SYSINIT to add these, and move into respective modules. */ extern struct filterops sig_filtops; @@ -703,13 +721,11 @@ filt_usertouch(struct knote *kn, struct kevent *kev, u_long type) } } -#ifndef __rtems__ -int -kqueue(struct thread *td, struct kqueue_args *uap) -#else /* __rtems__ */ -static int -rtems_bsd_kqueue(struct thread *td, struct kqueue_args *uap) +#ifdef __rtems__ +static #endif /* __rtems__ */ +int +sys_kqueue(struct thread *td, struct kqueue_args *uap) { struct filedesc *fdp; struct kqueue *kq; @@ -721,7 +737,7 @@ rtems_bsd_kqueue(struct thread *td, struct kqueue_args *uap) #else /* __rtems__ */ (void) fdp; #endif /* __rtems__ */ - error = falloc(td, &fp, &fd); + error = falloc(td, &fp, &fd, 0); if (error) goto done2; @@ -761,7 +777,7 @@ kqueue(void) int error; if (td != NULL) { - error = rtems_bsd_kqueue(td, &ua); + error = sys_kqueue(td, &ua); } else { error = ENOMEM; } @@ -784,17 +800,15 @@ struct kevent_args { const struct timespec *timeout; }; #endif -#ifndef __rtems__ -int -kevent(struct thread *td, struct kevent_args *uap) -#else /* __rtems__ */ +#ifdef __rtems__ static int kern_kevent(struct thread *td, int fd, int nchanges, int nevents, struct kevent_copyops *k_ops, const struct timespec *timeout); -static int -rtems_bsd_kevent(struct thread *td, struct kevent_args *uap) +static #endif /* __rtems__ */ +int +sys_kevent(struct thread *td, struct kevent_args *uap) { struct timespec ts, *tsp; struct kevent_copyops k_ops = { uap, @@ -864,7 +878,7 @@ kevent(int kq, const struct kevent *changelist, int nchanges, int error; if (td != NULL) { - error = rtems_bsd_kevent(td, &ua); + error = sys_kevent(td, &ua); } else { error = ENOMEM; } @@ -923,7 +937,7 @@ kern_kevent(struct thread *td, int fd, int nchanges, int nevents, struct file *fp; int i, n, nerrors, error; - if ((error = fget(td, fd, &fp)) != 0) + if ((error = fget(td, fd, CAP_POST_EVENT, &fp)) != 0) return (error); if ((error = kqueue_acquire(fp, &kq)) != 0) goto done_norel; @@ -1079,7 +1093,7 @@ kqueue_register(struct kqueue *kq, struct kevent *kev, struct thread *td, int wa findkn: if (fops->f_isfd) { KASSERT(td != NULL, ("td is NULL")); - error = fget(td, kev->ident, &fp); + error = fget(td, kev->ident, CAP_POLL_EVENT, &fp); if (error) goto done; @@ -1348,7 +1362,7 @@ kqueue_expand(struct kqueue *kq, struct filterops *fops, uintptr_t ident, size = kq->kq_knlistsize; while (size <= fd) size += KQEXTENT; - list = malloc(size * sizeof list, M_KQUEUE, mflag); + list = malloc(size * sizeof(*list), M_KQUEUE, mflag); if (list == NULL) return ENOMEM; KQ_LOCK(kq); @@ -1358,13 +1372,13 @@ kqueue_expand(struct kqueue *kq, struct filterops *fops, uintptr_t ident, } else { if (kq->kq_knlist != NULL) { bcopy(kq->kq_knlist, list, - kq->kq_knlistsize * sizeof list); + kq->kq_knlistsize * sizeof(*list)); to_free = kq->kq_knlist; kq->kq_knlist = NULL; } bzero((caddr_t)list + - kq->kq_knlistsize * sizeof list, - (size - kq->kq_knlistsize) * sizeof list); + kq->kq_knlistsize * sizeof(*list), + (size - kq->kq_knlistsize) * sizeof(*list)); kq->kq_knlistsize = size; kq->kq_knlist = list; } @@ -2373,7 +2387,7 @@ kqfd_register(int fd, struct kevent *kev, struct thread *td, int waitok) struct file *fp; int error; - if ((error = fget(td, fd, &fp)) != 0) + if ((error = fget(td, fd, CAP_POST_EVENT, &fp)) != 0) return (error); if ((error = kqueue_acquire(fp, &kq)) != 0) goto noacquire; diff --git a/freebsd/sys/kern/kern_hhook.c b/freebsd/sys/kern/kern_hhook.c index f6c9e73e..21239b24 100644 --- a/freebsd/sys/kern/kern_hhook.c +++ b/freebsd/sys/kern/kern_hhook.c @@ -1,7 +1,7 @@ #include <machine/rtems-bsd-kernel-space.h> /*- - * Copyright (c) 2010 Lawrence Stewart <lstewart@freebsd.org> + * Copyright (c) 2010,2013 Lawrence Stewart <lstewart@freebsd.org> * Copyright (c) 2010 The FreeBSD Foundation * All rights reserved. * @@ -63,15 +63,20 @@ struct hhook { static MALLOC_DEFINE(M_HHOOK, "hhook", "Helper hooks are linked off hhook_head lists"); LIST_HEAD(hhookheadhead, hhook_head); -VNET_DEFINE(struct hhookheadhead, hhook_head_list); -#define V_hhook_head_list VNET(hhook_head_list) +struct hhookheadhead hhook_head_list; +VNET_DEFINE(struct hhookheadhead, hhook_vhead_list); +#define V_hhook_vhead_list VNET(hhook_vhead_list) static struct mtx hhook_head_list_lock; MTX_SYSINIT(hhookheadlistlock, &hhook_head_list_lock, "hhook_head list lock", MTX_DEF); +/* Protected by hhook_head_list_lock. */ +static uint32_t n_hhookheads; + /* Private function prototypes. */ static void hhook_head_destroy(struct hhook_head *hhh); +void khelp_new_hhook_registered(struct hhook_head *hhh, uint32_t flags); #define HHHLIST_LOCK() mtx_lock(&hhook_head_list_lock) #define HHHLIST_UNLOCK() mtx_unlock(&hhook_head_list_lock) @@ -166,21 +171,71 @@ hhook_add_hook(struct hhook_head *hhh, struct hookinfo *hki, uint32_t flags) } /* - * Lookup a helper hook point and register a new helper hook function with it. + * Register a helper hook function with a helper hook point (including all + * virtual instances of the hook point if it is virtualised). + * + * The logic is unfortunately far more complex than for + * hhook_remove_hook_lookup() because hhook_add_hook() can call malloc() with + * M_WAITOK and thus we cannot call hhook_add_hook() with the + * hhook_head_list_lock held. + * + * The logic assembles an array of hhook_head structs that correspond to the + * helper hook point being hooked and bumps the refcount on each (all done with + * the hhook_head_list_lock held). The hhook_head_list_lock is then dropped, and + * hhook_add_hook() is called and the refcount dropped for each hhook_head + * struct in the array. */ int hhook_add_hook_lookup(struct hookinfo *hki, uint32_t flags) { - struct hhook_head *hhh; - int error; + struct hhook_head **heads_to_hook, *hhh; + int error, i, n_heads_to_hook; - hhh = hhook_head_get(hki->hook_type, hki->hook_id); +tryagain: + error = i = 0; + /* + * Accessing n_hhookheads without hhook_head_list_lock held opens up a + * race with hhook_head_register() which we are unlikely to lose, but + * nonetheless have to cope with - hence the complex goto logic. + */ + n_heads_to_hook = n_hhookheads; + heads_to_hook = malloc(n_heads_to_hook * sizeof(struct hhook_head *), + M_HHOOK, flags & HHOOK_WAITOK ? M_WAITOK : M_NOWAIT); + if (heads_to_hook == NULL) + return (ENOMEM); - if (hhh == NULL) - return (ENOENT); + HHHLIST_LOCK(); + LIST_FOREACH(hhh, &hhook_head_list, hhh_next) { + if (hhh->hhh_type == hki->hook_type && + hhh->hhh_id == hki->hook_id) { + if (i < n_heads_to_hook) { + heads_to_hook[i] = hhh; + refcount_acquire(&heads_to_hook[i]->hhh_refcount); + i++; + } else { + /* + * We raced with hhook_head_register() which + * inserted a hhook_head that we need to hook + * but did not malloc space for. Abort this run + * and try again. + */ + for (i--; i >= 0; i--) + refcount_release(&heads_to_hook[i]->hhh_refcount); + free(heads_to_hook, M_HHOOK); + HHHLIST_UNLOCK(); + goto tryagain; + } + } + } + HHHLIST_UNLOCK(); - error = hhook_add_hook(hhh, hki, flags); - hhook_head_release(hhh); + for (i--; i >= 0; i--) { + if (!error) + error = hhook_add_hook(heads_to_hook[i], hki, flags); + refcount_release(&heads_to_hook[i]->hhh_refcount); + } + + free(heads_to_hook, M_HHOOK); return (error); } @@ -212,20 +267,21 @@ hhook_remove_hook(struct hhook_head *hhh, struct hookinfo *hki) } /* - * Lookup a helper hook point and remove a helper hook function from it. + * Remove a helper hook function from a helper hook point (including all + * virtual instances of the hook point if it is virtualised). */ int hhook_remove_hook_lookup(struct hookinfo *hki) { struct hhook_head *hhh; - hhh = hhook_head_get(hki->hook_type, hki->hook_id); - - if (hhh == NULL) - return (ENOENT); - - hhook_remove_hook(hhh, hki); - hhook_head_release(hhh); + HHHLIST_LOCK(); + LIST_FOREACH(hhh, &hhook_head_list, hhh_next) { + if (hhh->hhh_type == hki->hook_type && + hhh->hhh_id == hki->hook_id) + hhook_remove_hook(hhh, hki); + } + HHHLIST_UNLOCK(); return (0); } @@ -247,13 +303,6 @@ hhook_head_register(int32_t hhook_type, int32_t hhook_id, struct hhook_head **hh return (EEXIST); } - /* XXXLAS: Need to implement support for non-virtualised hooks. */ - if ((flags & HHOOK_HEADISINVNET) == 0) { - printf("%s: only vnet-style virtualised hooks can be used\n", - __func__); - return (EINVAL); - } - tmphhh = malloc(sizeof(struct hhook_head), M_HHOOK, M_ZERO | ((flags & HHOOK_WAITOK) ? M_WAITOK : M_NOWAIT)); @@ -265,22 +314,27 @@ hhook_head_register(int32_t hhook_type, int32_t hhook_id, struct hhook_head **hh tmphhh->hhh_nhooks = 0; STAILQ_INIT(&tmphhh->hhh_hooks); HHH_LOCK_INIT(tmphhh); + refcount_init(&tmphhh->hhh_refcount, 1); - if (hhh != NULL) - refcount_init(&tmphhh->hhh_refcount, 1); - else - refcount_init(&tmphhh->hhh_refcount, 0); - + HHHLIST_LOCK(); if (flags & HHOOK_HEADISINVNET) { tmphhh->hhh_flags |= HHH_ISINVNET; - HHHLIST_LOCK(); - LIST_INSERT_HEAD(&V_hhook_head_list, tmphhh, hhh_next); - HHHLIST_UNLOCK(); - } else { - /* XXXLAS: Add tmphhh to the non-virtualised list. */ +#ifdef VIMAGE + KASSERT(curvnet != NULL, ("curvnet is NULL")); + tmphhh->hhh_vid = (uintptr_t)curvnet; + LIST_INSERT_HEAD(&V_hhook_vhead_list, tmphhh, hhh_vnext); +#endif } + LIST_INSERT_HEAD(&hhook_head_list, tmphhh, hhh_next); + n_hhookheads++; + HHHLIST_UNLOCK(); + + khelp_new_hhook_registered(tmphhh, flags); - *hhh = tmphhh; + if (hhh != NULL) + *hhh = tmphhh; + else + refcount_release(&tmphhh->hhh_refcount); return (0); } @@ -291,14 +345,20 @@ hhook_head_destroy(struct hhook_head *hhh) struct hhook *tmp, *tmp2; HHHLIST_LOCK_ASSERT(); + KASSERT(n_hhookheads > 0, ("n_hhookheads should be > 0")); LIST_REMOVE(hhh, hhh_next); +#ifdef VIMAGE + if (hhook_head_is_virtualised(hhh) == HHOOK_HEADISINVNET) + LIST_REMOVE(hhh, hhh_vnext); +#endif HHH_WLOCK(hhh); STAILQ_FOREACH_SAFE(tmp, &hhh->hhh_hooks, hhk_next, tmp2) free(tmp, M_HHOOK); HHH_WUNLOCK(hhh); HHH_LOCK_DESTROY(hhh); free(hhh, M_HHOOK); + n_hhookheads--; } /* @@ -350,10 +410,17 @@ hhook_head_get(int32_t hhook_type, int32_t hhook_id) { struct hhook_head *hhh; - /* XXXLAS: Pick hhook_head_list based on hhook_head flags. */ HHHLIST_LOCK(); - LIST_FOREACH(hhh, &V_hhook_head_list, hhh_next) { + LIST_FOREACH(hhh, &hhook_head_list, hhh_next) { if (hhh->hhh_type == hhook_type && hhh->hhh_id == hhook_id) { +#ifdef VIMAGE + if (hhook_head_is_virtualised(hhh) == + HHOOK_HEADISINVNET) { + KASSERT(curvnet != NULL, ("curvnet is NULL")); + if (hhh->hhh_vid != (uintptr_t)curvnet) + continue; + } +#endif refcount_acquire(&hhh->hhh_refcount); break; } @@ -415,7 +482,7 @@ static void hhook_vnet_init(const void *unused __unused) { - LIST_INIT(&V_hhook_head_list); + LIST_INIT(&V_hhook_vhead_list); } /* @@ -432,7 +499,7 @@ hhook_vnet_uninit(const void *unused __unused) * subsystem should have already called hhook_head_deregister(). */ HHHLIST_LOCK(); - LIST_FOREACH_SAFE(hhh, &V_hhook_head_list, hhh_next, tmphhh) { + LIST_FOREACH_SAFE(hhh, &V_hhook_vhead_list, hhh_vnext, tmphhh) { printf("%s: hhook_head type=%d, id=%d cleanup required\n", __func__, hhh->hhh_type, hhh->hhh_id); hhook_head_destroy(hhh); @@ -442,9 +509,9 @@ hhook_vnet_uninit(const void *unused __unused) /* - * When a vnet is created and being initialised, init the V_hhook_head_list. + * When a vnet is created and being initialised, init the V_hhook_vhead_list. */ -VNET_SYSINIT(hhook_vnet_init, SI_SUB_PROTO_BEGIN, SI_ORDER_FIRST, +VNET_SYSINIT(hhook_vnet_init, SI_SUB_MBUF, SI_ORDER_FIRST, hhook_vnet_init, NULL); /* @@ -452,5 +519,5 @@ VNET_SYSINIT(hhook_vnet_init, SI_SUB_PROTO_BEGIN, SI_ORDER_FIRST, * points to clean up on vnet tear down, but in case the KPI is misused, * provide a function to clean up and free memory for a vnet being destroyed. */ -VNET_SYSUNINIT(hhook_vnet_uninit, SI_SUB_PROTO_BEGIN, SI_ORDER_FIRST, +VNET_SYSUNINIT(hhook_vnet_uninit, SI_SUB_MBUF, SI_ORDER_ANY, hhook_vnet_uninit, NULL); diff --git a/freebsd/sys/kern/kern_intr.c b/freebsd/sys/kern/kern_intr.c index 2fd12773..b8074022 100644 --- a/freebsd/sys/kern/kern_intr.c +++ b/freebsd/sys/kern/kern_intr.c @@ -84,6 +84,7 @@ struct intr_thread { /* Interrupt thread flags kept in it_flags */ #define IT_DEAD 0x000001 /* Thread is waiting to exit. */ +#define IT_WAIT 0x000002 /* Thread is waiting for completion. */ struct intr_entropy { struct thread *td; @@ -576,17 +577,6 @@ intr_event_add_handler(struct intr_event *ie, const char *name, } } - /* Add the new handler to the event in priority order. */ - TAILQ_FOREACH(temp_ih, &ie->ie_handlers, ih_next) { - if (temp_ih->ih_pri > ih->ih_pri) - break; - } - if (temp_ih == NULL) - TAILQ_INSERT_TAIL(&ie->ie_handlers, ih, ih_next); - else - TAILQ_INSERT_BEFORE(temp_ih, ih, ih_next); - intr_event_update(ie); - /* Create a thread if we need one. */ while (ie->ie_thread == NULL && handler != NULL) { if (ie->ie_flags & IE_ADDING_THREAD) @@ -603,6 +593,18 @@ intr_event_add_handler(struct intr_event *ie, const char *name, wakeup(ie); } } + + /* Add the new handler to the event in priority order. */ + TAILQ_FOREACH(temp_ih, &ie->ie_handlers, ih_next) { + if (temp_ih->ih_pri > ih->ih_pri) + break; + } + if (temp_ih == NULL) + TAILQ_INSERT_TAIL(&ie->ie_handlers, ih, ih_next); + else + TAILQ_INSERT_BEFORE(temp_ih, ih, ih_next); + intr_event_update(ie); + CTR3(KTR_INTR, "%s: added %s to %s", __func__, ih->ih_name, ie->ie_name); mtx_unlock(&ie->ie_lock); @@ -650,23 +652,12 @@ intr_event_add_handler(struct intr_event *ie, const char *name, } } - /* Add the new handler to the event in priority order. */ - TAILQ_FOREACH(temp_ih, &ie->ie_handlers, ih_next) { - if (temp_ih->ih_pri > ih->ih_pri) - break; - } - if (temp_ih == NULL) - TAILQ_INSERT_TAIL(&ie->ie_handlers, ih, ih_next); - else - TAILQ_INSERT_BEFORE(temp_ih, ih, ih_next); - intr_event_update(ie); - /* For filtered handlers, create a private ithread to run on. */ - if (filter != NULL && handler != NULL) { + if (filter != NULL && handler != NULL) { mtx_unlock(&ie->ie_lock); - it = ithread_create("intr: newborn", ih); + it = ithread_create("intr: newborn", ih); mtx_lock(&ie->ie_lock); - it->it_event = ie; + it->it_event = ie; ih->ih_thread = it; ithread_update(it); // XXX - do we really need this?!?!? } else { /* Create the global per-event thread if we need one. */ @@ -686,6 +677,18 @@ intr_event_add_handler(struct intr_event *ie, const char *name, } } } + + /* Add the new handler to the event in priority order. */ + TAILQ_FOREACH(temp_ih, &ie->ie_handlers, ih_next) { + if (temp_ih->ih_pri > ih->ih_pri) + break; + } + if (temp_ih == NULL) + TAILQ_INSERT_TAIL(&ie->ie_handlers, ih, ih_next); + else + TAILQ_INSERT_BEFORE(temp_ih, ih, ih_next); + intr_event_update(ie); + CTR3(KTR_INTR, "%s: added %s to %s", __func__, ih->ih_name, ie->ie_name); mtx_unlock(&ie->ie_lock); @@ -773,7 +776,47 @@ intr_handler_source(void *cookie) return (ie->ie_source); } +/* + * Sleep until an ithread finishes executing an interrupt handler. + * + * XXX Doesn't currently handle interrupt filters or fast interrupt + * handlers. This is intended for compatibility with linux drivers + * only. Do not use in BSD code. + */ +void +_intr_drain(int irq) +{ + struct intr_event *ie; + struct intr_thread *ithd; + struct thread *td; + + ie = intr_lookup(irq); + if (ie == NULL) + return; + if (ie->ie_thread == NULL) + return; + ithd = ie->ie_thread; + td = ithd->it_thread; + /* + * We set the flag and wait for it to be cleared to avoid + * long delays with potentially busy interrupt handlers + * were we to only sample TD_AWAITING_INTR() every tick. + */ + thread_lock(td); + if (!TD_AWAITING_INTR(td)) { + ithd->it_flags |= IT_WAIT; + while (ithd->it_flags & IT_WAIT) { + thread_unlock(td); + pause("idrain", 1); + thread_lock(td); + } + } + thread_unlock(td); + return; +} #endif /* __rtems__ */ + + #ifndef INTR_FILTER #ifndef __rtems__ int @@ -835,7 +878,7 @@ ok: * again and remove this handler if it has already passed * it on the list. */ - ie->ie_thread->it_need = 1; + atomic_store_rel_int(&ie->ie_thread->it_need, 1); } else TAILQ_REMOVE(&ie->ie_handlers, handler, ih_next); thread_unlock(ie->ie_thread->it_thread); @@ -911,7 +954,7 @@ intr_event_schedule_thread(struct intr_event *ie) * running. Then, lock the thread and see if we actually need to * put it on the runqueue. */ - it->it_need = 1; + atomic_store_rel_int(&it->it_need, 1); thread_lock(td); #ifndef __rtems__ if (TD_AWAITING_INTR(td)) { @@ -998,7 +1041,7 @@ ok: * again and remove this handler if it has already passed * it on the list. */ - it->it_need = 1; + atomic_store_rel_int(&it->it_need, 1); } else TAILQ_REMOVE(&ie->ie_handlers, handler, ih_next); thread_unlock(it->it_thread); @@ -1078,7 +1121,7 @@ intr_event_schedule_thread(struct intr_event *ie, struct intr_thread *it) * running. Then, lock the thread and see if we actually need to * put it on the runqueue. */ - it->it_need = 1; + atomic_store_rel_int(&it->it_need, 1); thread_lock(td); if (TD_AWAITING_INTR(td)) { CTR3(KTR_INTR, "%s: schedule pid %d (%s)", __func__, p->p_pid, @@ -1161,11 +1204,21 @@ swi_sched(void *cookie, int flags) { struct intr_handler *ih = (struct intr_handler *)cookie; struct intr_event *ie = ih->ih_event; + struct intr_entropy entropy; int error; CTR3(KTR_INTR, "swi_sched: %s %s need=%d", ie->ie_name, ih->ih_name, ih->ih_need); + if (harvest.swi) { + CTR2(KTR_INTR, "swi_sched: pid %d (%s) gathering entropy", + curproc->p_pid, curthread->td_name); + entropy.event = (uintptr_t)ih; + entropy.td = curthread; + random_harvest(&entropy, sizeof(entropy), 1, 0, + RANDOM_INTERRUPT); + } + /* * Set ih_need for this handler so that if the ithread is already * running it will execute this handler on the next pass. Otherwise, @@ -1267,7 +1320,7 @@ intr_event_execute_handlers(struct proc *p, struct intr_event *ie) * interrupt threads always invoke all of their handlers. */ if (ie->ie_flags & IE_SOFT) { - if (!ih->ih_need) + if (atomic_load_acq_int(&ih->ih_need) == 0) continue; else atomic_store_rel_int(&ih->ih_need, 0); @@ -1345,6 +1398,7 @@ ithread_loop(void *arg) struct intr_event *ie; struct thread *td; struct proc *p; + int wake; td = curthread; #ifndef __rtems__ @@ -1357,6 +1411,7 @@ ithread_loop(void *arg) ("%s: ithread and proc linkage out of sync", __func__)); ie = ithd->it_event; ie->ie_count = 0; + wake = 0; /* * As long as we have interrupts outstanding, go through the @@ -1378,7 +1433,7 @@ ithread_loop(void *arg) * we are running, it will set it_need to note that we * should make another pass. */ - while (ithd->it_need) { + while (atomic_load_acq_int(&ithd->it_need) != 0) { /* * This might need a full read and write barrier * to make sure that this write posts before any @@ -1397,7 +1452,8 @@ ithread_loop(void *arg) * set again, so we have to check it again. */ thread_lock(td); - if (!ithd->it_need && !(ithd->it_flags & IT_DEAD)) { + if ((atomic_load_acq_int(&ithd->it_need) == 0) && + !(ithd->it_flags & (IT_DEAD | IT_WAIT))) { #ifndef __rtems__ TD_SET_IWAIT(td); ie->ie_count = 0; @@ -1415,7 +1471,15 @@ ithread_loop(void *arg) BSD_ASSERT(sc == RTEMS_SUCCESSFUL); #endif /* __rtems__ */ } + if (ithd->it_flags & IT_WAIT) { + wake = 1; + ithd->it_flags &= ~IT_WAIT; + } thread_unlock(td); + if (wake) { + wakeup(ithd); + wake = 0; + } } } #ifndef __rtems__ @@ -1435,6 +1499,7 @@ int intr_event_handle(struct intr_event *ie, struct trapframe *frame) { struct intr_handler *ih; + struct trapframe *oldframe; struct thread *td; int error, ret, thread; @@ -1454,6 +1519,8 @@ intr_event_handle(struct intr_event *ie, struct trapframe *frame) thread = 0; ret = 0; critical_enter(); + oldframe = td->td_intr_frame; + td->td_intr_frame = frame; TAILQ_FOREACH(ih, &ie->ie_handlers, ih_next) { if (ih->ih_filter == NULL) { thread = 1; @@ -1491,6 +1558,7 @@ intr_event_handle(struct intr_event *ie, struct trapframe *frame) thread = 1; } } + td->td_intr_frame = oldframe; if (thread) { if (ie->ie_pre_ithread != NULL) @@ -1529,6 +1597,7 @@ ithread_loop(void *arg) struct thread *td; struct proc *p; int priv; + int wake; td = curthread; p = td->td_proc; @@ -1539,6 +1608,7 @@ ithread_loop(void *arg) ("%s: ithread and proc linkage out of sync", __func__)); ie = ithd->it_event; ie->ie_count = 0; + wake = 0; /* * As long as we have interrupts outstanding, go through the @@ -1560,7 +1630,7 @@ ithread_loop(void *arg) * we are running, it will set it_need to note that we * should make another pass. */ - while (ithd->it_need) { + while (atomic_load_acq_int(&ithd->it_need) != 0) { /* * This might need a full read and write barrier * to make sure that this write posts before any @@ -1582,12 +1652,21 @@ ithread_loop(void *arg) * set again, so we have to check it again. */ thread_lock(td); - if (!ithd->it_need && !(ithd->it_flags & IT_DEAD)) { + if ((atomic_load_acq_int(&ithd->it_need) == 0) && + !(ithd->it_flags & (IT_DEAD | IT_WAIT))) { TD_SET_IWAIT(td); ie->ie_count = 0; mi_switch(SW_VOL | SWT_IWAIT, NULL); } + if (ithd->it_flags & IT_WAIT) { + wake = 1; + ithd->it_flags &= ~IT_WAIT; + } thread_unlock(td); + if (wake) { + wakeup(ithd); + wake = 0; + } } } @@ -1682,6 +1761,7 @@ int intr_event_handle(struct intr_event *ie, struct trapframe *frame) { struct intr_thread *ithd; + struct trapframe *oldframe; struct thread *td; int thread; @@ -1694,6 +1774,8 @@ intr_event_handle(struct intr_event *ie, struct trapframe *frame) td->td_intr_nesting_level++; thread = 0; critical_enter(); + oldframe = td->td_intr_frame; + td->td_intr_frame = frame; thread = intr_filter_loop(ie, frame, &ithd); if (thread & FILTER_HANDLED) { if (ie->ie_post_filter != NULL) @@ -1702,6 +1784,7 @@ intr_event_handle(struct intr_event *ie, struct trapframe *frame) if (ie->ie_pre_ithread != NULL) ie->ie_pre_ithread(ie->ie_source); } + td->td_intr_frame = oldframe; critical_exit(); /* Interrupt storm logic */ @@ -1760,7 +1843,16 @@ db_dump_intrhand(struct intr_handler *ih) break; } db_printf(" "); - db_printsym((uintptr_t)ih->ih_handler, DB_STGY_PROC); + if (ih->ih_filter != NULL) { + db_printf("[F]"); + db_printsym((uintptr_t)ih->ih_filter, DB_STGY_PROC); + } + if (ih->ih_handler != NULL) { + if (ih->ih_filter != NULL) + db_printf(","); + db_printf("[H]"); + db_printsym((uintptr_t)ih->ih_handler, DB_STGY_PROC); + } db_printf("(%p)", ih->ih_argument); if (ih->ih_need || (ih->ih_flags & (IH_EXCLUSIVE | IH_ENTROPY | IH_DEAD | @@ -1896,8 +1988,7 @@ SYSINIT(start_softintr, SI_SUB_SOFTINTR, SI_ORDER_FIRST, start_softintr, static int sysctl_intrnames(SYSCTL_HANDLER_ARGS) { - return (sysctl_handle_opaque(oidp, intrnames, eintrnames - intrnames, - req)); + return (sysctl_handle_opaque(oidp, intrnames, sintrnames, req)); } SYSCTL_PROC(_hw, OID_AUTO, intrnames, CTLTYPE_OPAQUE | CTLFLAG_RD, @@ -1906,8 +1997,7 @@ SYSCTL_PROC(_hw, OID_AUTO, intrnames, CTLTYPE_OPAQUE | CTLFLAG_RD, static int sysctl_intrcnt(SYSCTL_HANDLER_ARGS) { - return (sysctl_handle_opaque(oidp, intrcnt, - (char *)eintrcnt - (char *)intrcnt, req)); + return (sysctl_handle_opaque(oidp, intrcnt, sintrcnt, req)); } SYSCTL_PROC(_hw, OID_AUTO, intrcnt, CTLTYPE_OPAQUE | CTLFLAG_RD, @@ -1921,9 +2011,12 @@ DB_SHOW_COMMAND(intrcnt, db_show_intrcnt) { u_long *i; char *cp; + u_int j; cp = intrnames; - for (i = intrcnt; i != eintrcnt && !db_pager_quit; i++) { + j = 0; + for (i = intrcnt; j < (sintrcnt / sizeof(u_long)) && !db_pager_quit; + i++, j++) { if (*cp == '\0') break; if (*i != 0) diff --git a/freebsd/sys/kern/kern_khelp.c b/freebsd/sys/kern/kern_khelp.c index 9e4127da..e1192ae8 100644 --- a/freebsd/sys/kern/kern_khelp.c +++ b/freebsd/sys/kern/kern_khelp.c @@ -1,7 +1,7 @@ #include <machine/rtems-bsd-kernel-space.h> /*- - * Copyright (c) 2010 Lawrence Stewart <lstewart@freebsd.org> + * Copyright (c) 2010,2013 Lawrence Stewart <lstewart@freebsd.org> * Copyright (c) 2010 The FreeBSD Foundation * All rights reserved. * @@ -42,7 +42,6 @@ __FBSDID("$FreeBSD$"); #include <rtems/bsd/sys/param.h> #include <sys/kernel.h> #include <sys/hhook.h> -#include <sys/jail.h> #include <sys/khelp.h> #include <rtems/bsd/sys/lock.h> #include <sys/malloc.h> @@ -54,8 +53,6 @@ __FBSDID("$FreeBSD$"); #include <sys/rwlock.h> #include <sys/systm.h> -#include <net/vnet.h> - static struct rwlock khelp_list_lock; RW_SYSINIT(khelplistlock, &khelp_list_lock, "helper list lock"); @@ -63,6 +60,7 @@ static TAILQ_HEAD(helper_head, helper) helpers = TAILQ_HEAD_INITIALIZER(helpers) /* Private function prototypes. */ static inline void khelp_remove_osd(struct helper *h, struct osd *hosd); +void khelp_new_hhook_registered(struct hhook_head *hhh, uint32_t flags); #define KHELP_LIST_WLOCK() rw_wlock(&khelp_list_lock) #define KHELP_LIST_WUNLOCK() rw_wunlock(&khelp_list_lock) @@ -76,33 +74,32 @@ khelp_register_helper(struct helper *h) struct helper *tmph; int error, i, inserted; - error = 0; - inserted = 0; + error = inserted = 0; refcount_init(&h->h_refcount, 0); h->h_id = osd_register(OSD_KHELP, NULL, NULL); /* It's only safe to add the hooks after osd_register(). */ - if (h->h_nhooks > 0) { - for (i = 0; i < h->h_nhooks && !error; i++) { - /* We don't require the module to assign hook_helper. */ - h->h_hooks[i].hook_helper = h; - error = khelp_add_hhook(&h->h_hooks[i], HHOOK_NOWAIT); - } - - if (error) { - for (i--; i >= 0; i--) - khelp_remove_hhook(&h->h_hooks[i]); - - osd_deregister(OSD_KHELP, h->h_id); - } + for (i = 0; i < h->h_nhooks && !error; i++) { + /* We don't require the module to assign hook_helper. */ + h->h_hooks[i].hook_helper = h; + error = hhook_add_hook_lookup(&h->h_hooks[i], HHOOK_WAITOK); + if (error) + printf("%s: \"%s\" khelp module unable to " + "hook type %d id %d due to error %d\n", __func__, + h->h_name, h->h_hooks[i].hook_type, + h->h_hooks[i].hook_id, error); } - if (!error) { + if (error) { + for (i--; i >= 0; i--) + hhook_remove_hook_lookup(&h->h_hooks[i]); + osd_deregister(OSD_KHELP, h->h_id); + } else { KHELP_LIST_WLOCK(); /* * Keep list of helpers sorted in descending h_id order. Due to * the way osd_set() works, a sorted list ensures - * init_helper_osd() will operate with improved efficiency. + * khelp_init_osd() will operate with improved efficiency. */ TAILQ_FOREACH(tmph, &helpers, h_next) { if (tmph->h_id < h->h_id) { @@ -126,8 +123,6 @@ khelp_deregister_helper(struct helper *h) struct helper *tmph; int error, i; - error = 0; - KHELP_LIST_WLOCK(); if (h->h_refcount > 0) error = EBUSY; @@ -144,10 +139,8 @@ khelp_deregister_helper(struct helper *h) KHELP_LIST_WUNLOCK(); if (!error) { - if (h->h_nhooks > 0) { - for (i = 0; i < h->h_nhooks; i++) - khelp_remove_hhook(&h->h_hooks[i]); - } + for (i = 0; i < h->h_nhooks; i++) + hhook_remove_hook_lookup(&h->h_hooks[i]); osd_deregister(OSD_KHELP, h->h_id); } @@ -265,28 +258,13 @@ khelp_get_id(char *hname) int khelp_add_hhook(struct hookinfo *hki, uint32_t flags) { - VNET_ITERATOR_DECL(vnet_iter); int error; - error = 0; - /* - * XXXLAS: If a helper is dynamically adding a helper hook function at - * runtime using this function, we should update the helper's h_hooks - * struct member to include the additional hookinfo struct. + * XXXLAS: Should probably include the functionality to update the + * helper's h_hooks struct member. */ - - VNET_LIST_RLOCK_NOSLEEP(); - VNET_FOREACH(vnet_iter) { - CURVNET_SET(vnet_iter); - error = hhook_add_hook_lookup(hki, flags); - CURVNET_RESTORE(); -#ifdef VIMAGE - if (error) - break; -#endif - } - VNET_LIST_RUNLOCK_NOSLEEP(); + error = hhook_add_hook_lookup(hki, flags); return (error); } @@ -294,32 +272,47 @@ khelp_add_hhook(struct hookinfo *hki, uint32_t flags) int khelp_remove_hhook(struct hookinfo *hki) { - VNET_ITERATOR_DECL(vnet_iter); int error; - error = 0; - /* - * XXXLAS: If a helper is dynamically removing a helper hook function at - * runtime using this function, we should update the helper's h_hooks - * struct member to remove the defunct hookinfo struct. + * XXXLAS: Should probably include the functionality to update the + * helper's h_hooks struct member. */ - - VNET_LIST_RLOCK_NOSLEEP(); - VNET_FOREACH(vnet_iter) { - CURVNET_SET(vnet_iter); - error = hhook_remove_hook_lookup(hki); - CURVNET_RESTORE(); -#ifdef VIMAGE - if (error) - break; -#endif - } - VNET_LIST_RUNLOCK_NOSLEEP(); + error = hhook_remove_hook_lookup(hki); return (error); } +/* + * Private KPI between hhook and khelp that allows khelp modules to insert hook + * functions into hhook points which register after the modules were loaded. + */ +void +khelp_new_hhook_registered(struct hhook_head *hhh, uint32_t flags) +{ + struct helper *h; + int error, i; + + KHELP_LIST_RLOCK(); + TAILQ_FOREACH(h, &helpers, h_next) { + for (i = 0; i < h->h_nhooks; i++) { + if (hhh->hhh_type != h->h_hooks[i].hook_type || + hhh->hhh_id != h->h_hooks[i].hook_id) + continue; + error = hhook_add_hook(hhh, &h->h_hooks[i], flags); + if (error) { + printf("%s: \"%s\" khelp module unable to " + "hook type %d id %d due to error %d\n", + __func__, h->h_name, + h->h_hooks[i].hook_type, + h->h_hooks[i].hook_id, error); + error = 0; + } + } + } + KHELP_LIST_RUNLOCK(); +} + #ifndef __rtems__ int khelp_modevent(module_t mod, int event_type, void *data) @@ -381,95 +374,3 @@ khelp_modevent(module_t mod, int event_type, void *data) return (error); } #endif /* __rtems__ */ - -/* - * This function is called in two separate situations: - * - * - When the kernel is booting, it is called directly by the SYSINIT framework - * to allow Khelp modules which were compiled into the kernel or loaded by the - * boot loader to insert their non-virtualised hook functions into the kernel. - * - * - When the kernel is booting or a vnet is created, this function is also - * called indirectly through khelp_vnet_init() by the vnet initialisation code. - * In this situation, Khelp modules are able to insert their virtualised hook - * functions into the virtualised hook points in the vnet which is being - * initialised. In the case where the kernel is not compiled with "options - * VIMAGE", this step is still run once at boot, but the hook functions get - * transparently inserted into the standard unvirtualised network stack. - */ -static void -khelp_init(const void *vnet) -{ - struct helper *h; - int error, i, vinit; - int32_t htype, hid; - - error = 0; - vinit = vnet != NULL; - - KHELP_LIST_RLOCK(); - TAILQ_FOREACH(h, &helpers, h_next) { - for (i = 0; i < h->h_nhooks && !error; i++) { - htype = h->h_hooks[i].hook_type; - hid = h->h_hooks[i].hook_id; - - /* - * If we're doing a virtualised init (vinit != 0) and - * the hook point is virtualised, or we're doing a plain - * sysinit at boot and the hook point is not - * virtualised, insert the hook. - */ - if ((hhook_head_is_virtualised_lookup(htype, hid) == - HHOOK_HEADISINVNET && vinit) || - (!hhook_head_is_virtualised_lookup(htype, hid) && - !vinit)) { - error = hhook_add_hook_lookup(&h->h_hooks[i], - HHOOK_NOWAIT); - } - } - - if (error) { - /* Remove any helper's hooks we successfully added. */ - for (i--; i >= 0; i--) - hhook_remove_hook_lookup(&h->h_hooks[i]); - - printf("%s: Failed to add hooks for helper \"%s\" (%p)", - __func__, h->h_name, h); - if (vinit) - printf(" to vnet %p.\n", vnet); - else - printf(".\n"); - - error = 0; - } - } - KHELP_LIST_RUNLOCK(); -} - -/* - * Vnet created and being initialised. - */ -static void -khelp_vnet_init(const void *unused __unused) -{ - - khelp_init(TD_TO_VNET(curthread)); -} - - -/* - * As the kernel boots, allow Khelp modules which were compiled into the kernel - * or loaded by the boot loader to insert their non-virtualised hook functions - * into the kernel. - */ -SYSINIT(khelp_init, SI_SUB_PROTO_END, SI_ORDER_FIRST, khelp_init, NULL); - -/* - * When a vnet is created and being initialised, we need to insert the helper - * hook functions for all currently registered Khelp modules into the vnet's - * helper hook points. The hhook KPI provides a mechanism for subsystems which - * export helper hook points to clean up on vnet shutdown, so we don't need a - * VNET_SYSUNINIT for Khelp. - */ -VNET_SYSINIT(khelp_vnet_init, SI_SUB_PROTO_END, SI_ORDER_FIRST, - khelp_vnet_init, NULL); diff --git a/freebsd/sys/kern/kern_linker.c b/freebsd/sys/kern/kern_linker.c index d6975a4a..b1b46d7a 100644 --- a/freebsd/sys/kern/kern_linker.c +++ b/freebsd/sys/kern/kern_linker.c @@ -67,6 +67,8 @@ __FBSDID("$FreeBSD$"); #ifndef __rtems__ #ifdef KLD_DEBUG int kld_debug = 0; +SYSCTL_INT(_debug, OID_AUTO, kld_debug, CTLFLAG_RW, + &kld_debug, 0, "Set various levels of KLD debug"); #endif #define KLD_LOCK() sx_xlock(&kld_sx) @@ -743,6 +745,9 @@ linker_file_add_dependency(linker_file_t file, linker_file_t dep) file->deps = newdeps; file->deps[file->ndeps] = dep; file->ndeps++; + KLD_DPF(FILE, ("linker_file_add_dependency:" + " adding %s as dependency for %s\n", + dep->filename, file->filename)); return (0); } @@ -1071,7 +1076,7 @@ done: } int -kldload(struct thread *td, struct kldload_args *uap) +sys_kldload(struct thread *td, struct kldload_args *uap) { char *pathname = NULL; int error, fileid; @@ -1151,14 +1156,14 @@ kern_kldunload(struct thread *td, int fileid, int flags) } int -kldunload(struct thread *td, struct kldunload_args *uap) +sys_kldunload(struct thread *td, struct kldunload_args *uap) { return (kern_kldunload(td, uap->fileid, LINKER_UNLOAD_NORMAL)); } int -kldunloadf(struct thread *td, struct kldunloadf_args *uap) +sys_kldunloadf(struct thread *td, struct kldunloadf_args *uap) { if (uap->flags != LINKER_UNLOAD_NORMAL && @@ -1168,7 +1173,7 @@ kldunloadf(struct thread *td, struct kldunloadf_args *uap) } int -kldfind(struct thread *td, struct kldfind_args *uap) +sys_kldfind(struct thread *td, struct kldfind_args *uap) { char *pathname; const char *filename; @@ -1201,7 +1206,7 @@ out: } int -kldnext(struct thread *td, struct kldnext_args *uap) +sys_kldnext(struct thread *td, struct kldnext_args *uap) { linker_file_t lf; int error = 0; @@ -1238,7 +1243,7 @@ out: } int -kldstat(struct thread *td, struct kldstat_args *uap) +sys_kldstat(struct thread *td, struct kldstat_args *uap) { struct kld_file_stat stat; int error, version; @@ -1300,7 +1305,7 @@ kern_kldstat(struct thread *td, int fileid, struct kld_file_stat *stat) } int -kldfirstmod(struct thread *td, struct kldfirstmod_args *uap) +sys_kldfirstmod(struct thread *td, struct kldfirstmod_args *uap) { linker_file_t lf; module_t mp; @@ -1329,7 +1334,7 @@ kldfirstmod(struct thread *td, struct kldfirstmod_args *uap) } int -kldsym(struct thread *td, struct kldsym_args *uap) +sys_kldsym(struct thread *td, struct kldsym_args *uap) { char *symstr = NULL; c_linker_sym_t sym; @@ -1631,6 +1636,12 @@ restart: modname = mp->md_cval; verinfo = mp->md_data; mod = modlist_lookup2(modname, verinfo); + if (mod == NULL) { + printf("KLD file %s - cannot find " + "dependency \"%s\"\n", + lf->filename, modname); + goto fail; + } /* Don't count self-dependencies */ if (lf == mod->container) continue; @@ -1647,11 +1658,9 @@ restart: */ error = LINKER_LINK_PRELOAD_FINISH(lf); if (error) { - TAILQ_REMOVE(&depended_files, lf, loaded); printf("KLD file %s - could not finalize loading\n", lf->filename); - linker_file_unload(lf, LINKER_UNLOAD_FORCE); - continue; + goto fail; } linker_file_register_modules(lf); if (linker_file_lookup_set(lf, "sysinit_set", &si_start, @@ -1659,6 +1668,10 @@ restart: sysinit_add(si_start, si_stop); linker_file_register_sysctls(lf); lf->flags |= LINKER_FILE_LINKED; + continue; +fail: + TAILQ_REMOVE(&depended_files, lf, loaded); + linker_file_unload(lf, LINKER_UNLOAD_FORCE); } /* woohoo! we made it! */ } @@ -1765,7 +1778,8 @@ linker_hints_lookup(const char *path, int pathlen, const char *modname, struct vattr vattr, mattr; u_char *hints = NULL; u_char *cp, *recptr, *bufend, *result, *best, *pathbuf, *sep; - int error, ival, bestver, *intp, reclen, found, flags, clen, blen; + int error, ival, bestver, *intp, found, flags, clen, blen; + ssize_t reclen; int vfslocked = 0; result = NULL; @@ -1810,7 +1824,7 @@ linker_hints_lookup(const char *path, int pathlen, const char *modname, VFS_UNLOCK_GIANT(vfslocked); nd.ni_vp = NULL; if (reclen != 0) { - printf("can't read %d\n", reclen); + printf("can't read %zd\n", reclen); goto bad; } intp = (int *)hints; @@ -2184,6 +2198,6 @@ sysctl_kern_function_list(SYSCTL_HANDLER_ARGS) return (SYSCTL_OUT(req, "", 1)); } -SYSCTL_PROC(_kern, OID_AUTO, function_list, CTLFLAG_RD, +SYSCTL_PROC(_kern, OID_AUTO, function_list, CTLTYPE_OPAQUE | CTLFLAG_RD, NULL, 0, sysctl_kern_function_list, "", "kernel function list"); #endif /* __rtems__ */ diff --git a/freebsd/sys/kern/kern_mbuf.c b/freebsd/sys/kern/kern_mbuf.c index 5ad62894..98cfb1f0 100644 --- a/freebsd/sys/kern/kern_mbuf.c +++ b/freebsd/sys/kern/kern_mbuf.c @@ -112,14 +112,23 @@ struct mbstat mbstat; static void tunable_mbinit(void *dummy) { - TUNABLE_INT_FETCH("kern.ipc.nmbclusters", &nmbclusters); /* This has to be done before VM init. */ + TUNABLE_INT_FETCH("kern.ipc.nmbclusters", &nmbclusters); if (nmbclusters == 0) nmbclusters = 1024 + maxusers * 64; - nmbjumbop = nmbclusters / 2; - nmbjumbo9 = nmbjumbop / 2; - nmbjumbo16 = nmbjumbo9 / 2; + + TUNABLE_INT_FETCH("kern.ipc.nmbjumbop", &nmbjumbop); + if (nmbjumbop == 0) + nmbjumbop = nmbclusters / 2; + + TUNABLE_INT_FETCH("kern.ipc.nmbjumbo9", &nmbjumbo9); + if (nmbjumbo9 == 0) + nmbjumbo9 = nmbclusters / 4; + + TUNABLE_INT_FETCH("kern.ipc.nmbjumbo16", &nmbjumbo16); + if (nmbjumbo16 == 0) + nmbjumbo16 = nmbclusters / 8; } SYSINIT(tunable_mbinit, SI_SUB_TUNABLES, SI_ORDER_MIDDLE, tunable_mbinit, NULL); @@ -237,7 +246,7 @@ static void mb_zfini_pack(void *, int); static void mb_reclaim(void *); static void mbuf_init(void *); -static void *mbuf_jumbo_alloc(uma_zone_t, int, u_int8_t *, int); +static void *mbuf_jumbo_alloc(uma_zone_t, int, uint8_t *, int); /* Ensure that MSIZE doesn't break dtom() - it must be a power of 2 */ CTASSERT((((MSIZE - 1) ^ MSIZE) + 1) >> 1 == MSIZE); @@ -356,7 +365,7 @@ mbuf_init(void *dummy) * pages. */ static void * -mbuf_jumbo_alloc(uma_zone_t zone, int bytes, u_int8_t *flags, int wait) +mbuf_jumbo_alloc(uma_zone_t zone, int bytes, uint8_t *flags, int wait) { /* Inform UMA that this allocator uses kernel_map/object. */ diff --git a/freebsd/sys/kern/kern_mib.c b/freebsd/sys/kern/kern_mib.c index 7a364a5e..1450f61c 100644 --- a/freebsd/sys/kern/kern_mib.c +++ b/freebsd/sys/kern/kern_mib.c @@ -59,7 +59,7 @@ __FBSDID("$FreeBSD$"); SYSCTL_NODE(, 0, sysctl, CTLFLAG_RW, 0, "Sysctl internal magic"); -SYSCTL_NODE(, CTL_KERN, kern, CTLFLAG_RW, 0, +SYSCTL_NODE(, CTL_KERN, kern, CTLFLAG_RW|CTLFLAG_CAPRD, 0, "High kernel, proc, limits &c"); #ifndef __rtems__ SYSCTL_NODE(, CTL_VM, vm, CTLFLAG_RW, 0, @@ -99,10 +99,10 @@ SYSCTL_NODE(, OID_AUTO, regression, CTLFLAG_RW, 0, SYSCTL_STRING(_kern, OID_AUTO, ident, CTLFLAG_RD|CTLFLAG_MPSAFE, kern_ident, 0, "Kernel identifier"); -SYSCTL_STRING(_kern, KERN_OSRELEASE, osrelease, CTLFLAG_RD|CTLFLAG_MPSAFE, - osrelease, 0, "Operating system release"); +SYSCTL_STRING(_kern, KERN_OSRELEASE, osrelease, CTLFLAG_RD|CTLFLAG_MPSAFE| + CTLFLAG_CAPRD, osrelease, 0, "Operating system release"); -SYSCTL_INT(_kern, KERN_OSREV, osrevision, CTLFLAG_RD, +SYSCTL_INT(_kern, KERN_OSREV, osrevision, CTLFLAG_RD|CTLFLAG_CAPRD, 0, BSD, "Operating system revision"); SYSCTL_STRING(_kern, KERN_VERSION, version, CTLFLAG_RD|CTLFLAG_MPSAFE, @@ -111,14 +111,14 @@ SYSCTL_STRING(_kern, KERN_VERSION, version, CTLFLAG_RD|CTLFLAG_MPSAFE, SYSCTL_STRING(_kern, OID_AUTO, compiler_version, CTLFLAG_RD|CTLFLAG_MPSAFE, compiler_version, 0, "Version of compiler used to compile kernel"); -SYSCTL_STRING(_kern, KERN_OSTYPE, ostype, CTLFLAG_RD|CTLFLAG_MPSAFE, - ostype, 0, "Operating system type"); +SYSCTL_STRING(_kern, KERN_OSTYPE, ostype, CTLFLAG_RD|CTLFLAG_MPSAFE| + CTLFLAG_CAPRD, ostype, 0, "Operating system type"); /* * NOTICE: The *userland* release date is available in * /usr/include/osreldate.h */ -SYSCTL_INT(_kern, KERN_OSRELDATE, osreldate, CTLFLAG_RD, +SYSCTL_INT(_kern, KERN_OSRELDATE, osreldate, CTLFLAG_RD|CTLFLAG_CAPRD, &osreldate, 0, "Kernel release date"); SYSCTL_INT(_kern, KERN_MAXPROC, maxproc, CTLFLAG_RDTUN, @@ -130,24 +130,24 @@ SYSCTL_INT(_kern, KERN_MAXPROCPERUID, maxprocperuid, CTLFLAG_RW, SYSCTL_INT(_kern, OID_AUTO, maxusers, CTLFLAG_RDTUN, &maxusers, 0, "Hint for kernel tuning"); -SYSCTL_INT(_kern, KERN_ARGMAX, argmax, CTLFLAG_RD, +SYSCTL_INT(_kern, KERN_ARGMAX, argmax, CTLFLAG_RD|CTLFLAG_CAPRD, 0, ARG_MAX, "Maximum bytes of argument to execve(2)"); -SYSCTL_INT(_kern, KERN_POSIX1, posix1version, CTLFLAG_RD, +SYSCTL_INT(_kern, KERN_POSIX1, posix1version, CTLFLAG_RD|CTLFLAG_CAPRD, 0, _POSIX_VERSION, "Version of POSIX attempting to comply to"); -SYSCTL_INT(_kern, KERN_NGROUPS, ngroups, CTLFLAG_RDTUN, +SYSCTL_INT(_kern, KERN_NGROUPS, ngroups, CTLFLAG_RDTUN|CTLFLAG_CAPRD, &ngroups_max, 0, "Maximum number of supplemental groups a user can belong to"); -SYSCTL_INT(_kern, KERN_JOB_CONTROL, job_control, CTLFLAG_RD, +SYSCTL_INT(_kern, KERN_JOB_CONTROL, job_control, CTLFLAG_RD|CTLFLAG_CAPRD, 0, 1, "Whether job control is available"); #ifdef _POSIX_SAVED_IDS -SYSCTL_INT(_kern, KERN_SAVED_IDS, saved_ids, CTLFLAG_RD, +SYSCTL_INT(_kern, KERN_SAVED_IDS, saved_ids, CTLFLAG_RD|CTLFLAG_CAPRD, 0, 1, "Whether saved set-group/user ID is available"); #else -SYSCTL_INT(_kern, KERN_SAVED_IDS, saved_ids, CTLFLAG_RD, +SYSCTL_INT(_kern, KERN_SAVED_IDS, saved_ids, CTLFLAG_RD|CTLFLAG_CAPRD, 0, 0, "Whether saved set-group/user ID is available"); #endif @@ -156,13 +156,13 @@ char kernelname[MAXPATHLEN] = "/kernel"; /* XXX bloat */ SYSCTL_STRING(_kern, KERN_BOOTFILE, bootfile, CTLFLAG_RW, kernelname, sizeof kernelname, "Name of kernel file booted"); -SYSCTL_INT(_hw, HW_NCPU, ncpu, CTLFLAG_RD, +SYSCTL_INT(_hw, HW_NCPU, ncpu, CTLFLAG_RD|CTLFLAG_CAPRD, &mp_ncpus, 0, "Number of active CPUs"); -SYSCTL_INT(_hw, HW_BYTEORDER, byteorder, CTLFLAG_RD, +SYSCTL_INT(_hw, HW_BYTEORDER, byteorder, CTLFLAG_RD|CTLFLAG_CAPRD, 0, BYTE_ORDER, "System byte order"); -SYSCTL_INT(_hw, HW_PAGESIZE, pagesize, CTLFLAG_RD, +SYSCTL_INT(_hw, HW_PAGESIZE, pagesize, CTLFLAG_RD|CTLFLAG_CAPRD, 0, PAGE_SIZE, "System memory page size"); static int @@ -179,7 +179,7 @@ sysctl_kern_arnd(SYSCTL_HANDLER_ARGS) } SYSCTL_PROC(_kern, KERN_ARND, arandom, - CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, + CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE | CTLFLAG_CAPRD, NULL, 0, sysctl_kern_arnd, "", "arc4rand"); static int @@ -215,7 +215,7 @@ sysctl_hw_usermem(SYSCTL_HANDLER_ARGS) SYSCTL_PROC(_hw, HW_USERMEM, usermem, CTLTYPE_ULONG | CTLFLAG_RD, 0, 0, sysctl_hw_usermem, "LU", ""); -SYSCTL_ULONG(_hw, OID_AUTO, availpages, CTLFLAG_RD, &physmem, 0, ""); +SYSCTL_LONG(_hw, OID_AUTO, availpages, CTLFLAG_RD, &physmem, 0, ""); u_long pagesizes[MAXPAGESIZES] = { PAGE_SIZE }; @@ -462,6 +462,8 @@ FEATURE(compat_freebsd7, "Compatible with FreeBSD 7"); * This is really cheating. These actually live in the libc, something * which I'm not quite sure is a good idea anyway, but in order for * getnext and friends to actually work, we define dummies here. + * + * XXXRW: These probably should be CTLFLAG_CAPRD. */ SYSCTL_STRING(_user, USER_CS_PATH, cs_path, CTLFLAG_RD, "", 0, "PATH that finds all the standard utilities"); @@ -511,6 +513,34 @@ SYSCTL_INT(_debug_sizeof, OID_AUTO, vnode, CTLFLAG_RD, SYSCTL_INT(_debug_sizeof, OID_AUTO, proc, CTLFLAG_RD, 0, sizeof(struct proc), "sizeof(struct proc)"); +static int +sysctl_kern_pid_max(SYSCTL_HANDLER_ARGS) +{ + int error, pm; + + pm = pid_max; + error = sysctl_handle_int(oidp, &pm, 0, req); + if (error || !req->newptr) + return (error); + sx_xlock(&proctree_lock); + sx_xlock(&allproc_lock); + + /* + * Only permit the values less then PID_MAX. + * As a safety measure, do not allow to limit the pid_max too much. + */ + if (pm < 300 || pm > PID_MAX) + error = EINVAL; + else + pid_max = pm; + sx_xunlock(&allproc_lock); + sx_xunlock(&proctree_lock); + return (error); +} +SYSCTL_PROC(_kern, OID_AUTO, pid_max, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_TUN | + CTLFLAG_MPSAFE, 0, 0, sysctl_kern_pid_max, "I", + "Maximum allowed pid"); + #include <sys/bio.h> #include <sys/buf.h> SYSCTL_INT(_debug_sizeof, OID_AUTO, bio, CTLFLAG_RD, diff --git a/freebsd/sys/kern/kern_module.c b/freebsd/sys/kern/kern_module.c index 40e64371..72c9d99d 100644 --- a/freebsd/sys/kern/kern_module.c +++ b/freebsd/sys/kern/kern_module.c @@ -337,7 +337,7 @@ module_file(module_t mod) * Syscalls. */ int -modnext(struct thread *td, struct modnext_args *uap) +sys_modnext(struct thread *td, struct modnext_args *uap) { module_t mod; int error = 0; @@ -368,7 +368,7 @@ done2: } int -modfnext(struct thread *td, struct modfnext_args *uap) +sys_modfnext(struct thread *td, struct modfnext_args *uap) { module_t mod; int error; @@ -398,7 +398,7 @@ struct module_stat_v1 { }; int -modstat(struct thread *td, struct modstat_args *uap) +sys_modstat(struct thread *td, struct modstat_args *uap) { module_t mod; modspecific_t data; @@ -451,7 +451,7 @@ modstat(struct thread *td, struct modstat_args *uap) } int -modfind(struct thread *td, struct modfind_args *uap) +sys_modfind(struct thread *td, struct modfind_args *uap) { int error = 0; char name[MAXMODNAME]; @@ -482,9 +482,9 @@ MODULE_VERSION(kernel, __FreeBSD_version); typedef union modspecific32 { int intval; - u_int32_t uintval; + uint32_t uintval; int longval; - u_int32_t ulongval; + uint32_t ulongval; } modspecific32_t; struct module_stat32 { diff --git a/freebsd/sys/kern/kern_sysctl.c b/freebsd/sys/kern/kern_sysctl.c index 2fb986bb..0903dd74 100644 --- a/freebsd/sys/kern/kern_sysctl.c +++ b/freebsd/sys/kern/kern_sysctl.c @@ -40,12 +40,14 @@ #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); +#include <rtems/bsd/local/opt_capsicum.h> #include <rtems/bsd/local/opt_compat.h> #include <rtems/bsd/local/opt_ktrace.h> #include <rtems/bsd/sys/param.h> #include <sys/fail.h> #include <sys/systm.h> +#include <sys/capability.h> #include <sys/kernel.h> #include <sys/sysctl.h> #include <sys/malloc.h> @@ -367,10 +369,31 @@ sysctl_remove_oid(struct sysctl_oid *oidp, int del, int recurse) return (error); } +int +sysctl_remove_name(struct sysctl_oid *parent, const char *name, + int del, int recurse) +{ + struct sysctl_oid *p, *tmp; + int error; + + error = ENOENT; + SYSCTL_XLOCK(); + SLIST_FOREACH_SAFE(p, SYSCTL_CHILDREN(parent), oid_link, tmp) { + if (strcmp(p->oid_name, name) == 0) { + error = sysctl_remove_oid_locked(p, del, recurse); + break; + } + } + SYSCTL_XUNLOCK(); + + return (error); +} + + static int sysctl_remove_oid_locked(struct sysctl_oid *oidp, int del, int recurse) { - struct sysctl_oid *p; + struct sysctl_oid *p, *tmp; int error; SYSCTL_ASSERT_XLOCKED(); @@ -389,7 +412,8 @@ sysctl_remove_oid_locked(struct sysctl_oid *oidp, int del, int recurse) */ if ((oidp->oid_kind & CTLTYPE) == CTLTYPE_NODE) { if (oidp->oid_refcnt == 1) { - SLIST_FOREACH(p, SYSCTL_CHILDREN(oidp), oid_link) { + SLIST_FOREACH_SAFE(p, + SYSCTL_CHILDREN(oidp), oid_link, tmp) { if (!recurse) return (ENOTEMPTY); error = sysctl_remove_oid_locked(p, del, @@ -430,14 +454,13 @@ sysctl_remove_oid_locked(struct sysctl_oid *oidp, int del, int recurse) } return (0); } - /* * Create new sysctls at run time. * clist may point to a valid context initialized with sysctl_ctx_init(). */ struct sysctl_oid * sysctl_add_oid(struct sysctl_ctx_list *clist, struct sysctl_oid_list *parent, - int number, const char *name, int kind, void *arg1, int arg2, + int number, const char *name, int kind, void *arg1, intptr_t arg2, int (*handler)(SYSCTL_HANDLER_ARGS), const char *fmt, const char *descr) { struct sysctl_oid *oidp; @@ -475,6 +498,7 @@ sysctl_add_oid(struct sysctl_ctx_list *clist, struct sysctl_oid_list *parent, SYSCTL_CHILDREN_SET(oidp, malloc(sizeof(struct sysctl_oid_list), M_SYSCTLOID, M_WAITOK)); SLIST_INIT(SYSCTL_CHILDREN(oidp)); + oidp->oid_arg2 = arg2; } else { oidp->oid_arg1 = arg1; oidp->oid_arg2 = arg2; @@ -603,8 +627,12 @@ sysctl_sysctl_debug_dump_node(struct sysctl_oid_list *l, int i) } break; case CTLTYPE_INT: printf(" Int\n"); break; + case CTLTYPE_UINT: printf(" u_int\n"); break; + case CTLTYPE_LONG: printf(" Long\n"); break; + case CTLTYPE_ULONG: printf(" u_long\n"); break; case CTLTYPE_STRING: printf(" String\n"); break; - case CTLTYPE_QUAD: printf(" Quad\n"); break; + case CTLTYPE_U64: printf(" uint64_t\n"); break; + case CTLTYPE_S64: printf(" int64_t\n"); break; case CTLTYPE_OPAQUE: printf(" Opaque/struct\n"); break; default: printf("\n"); } @@ -687,7 +715,12 @@ sysctl_sysctl_name(SYSCTL_HANDLER_ARGS) return (error); } -static SYSCTL_NODE(_sysctl, 1, name, CTLFLAG_RD, sysctl_sysctl_name, ""); +/* + * XXXRW/JA: Shouldn't return name data for nodes that we don't permit in + * capability mode. + */ +static SYSCTL_NODE(_sysctl, 1, name, CTLFLAG_RD | CTLFLAG_CAPRD, + sysctl_sysctl_name, ""); static int sysctl_sysctl_next_ls(struct sysctl_oid_list *lsp, int *name, u_int namelen, @@ -768,7 +801,12 @@ sysctl_sysctl_next(SYSCTL_HANDLER_ARGS) return (error); } -static SYSCTL_NODE(_sysctl, 2, next, CTLFLAG_RD, sysctl_sysctl_next, ""); +/* + * XXXRW/JA: Shouldn't return next data for nodes that we don't permit in + * capability mode. + */ +static SYSCTL_NODE(_sysctl, 2, next, CTLFLAG_RD | CTLFLAG_CAPRD, + sysctl_sysctl_next, ""); static int name2oid(char *name, int *oid, int *len, struct sysctl_oid **oidpp) @@ -813,7 +851,7 @@ static int sysctl_sysctl_name2oid(SYSCTL_HANDLER_ARGS) { char *p; - int error, oid[CTL_MAXNAME], len; + int error, oid[CTL_MAXNAME], len = 0; struct sysctl_oid *op = 0; if (!req->newlen) @@ -830,7 +868,7 @@ sysctl_sysctl_name2oid(SYSCTL_HANDLER_ARGS) } p [req->newlen] = '\0'; - len = 0; + SYSCTL_XLOCK(); error = name2oid(p, oid, &len, &op); SYSCTL_XUNLOCK(); @@ -844,8 +882,13 @@ sysctl_sysctl_name2oid(SYSCTL_HANDLER_ARGS) return (error); } -SYSCTL_PROC(_sysctl, 3, name2oid, CTLFLAG_RW|CTLFLAG_ANYBODY|CTLFLAG_MPSAFE, - 0, 0, sysctl_sysctl_name2oid, "I", ""); +/* + * XXXRW/JA: Shouldn't return name2oid data for nodes that we don't permit in + * capability mode. + */ +SYSCTL_PROC(_sysctl, 3, name2oid, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_ANYBODY | CTLFLAG_MPSAFE + | CTLFLAG_CAPRW, 0, 0, sysctl_sysctl_name2oid, "I", ""); static int sysctl_sysctl_oidfmt(SYSCTL_HANDLER_ARGS) @@ -872,7 +915,7 @@ sysctl_sysctl_oidfmt(SYSCTL_HANDLER_ARGS) } -static SYSCTL_NODE(_sysctl, 4, oidfmt, CTLFLAG_RD|CTLFLAG_MPSAFE, +static SYSCTL_NODE(_sysctl, 4, oidfmt, CTLFLAG_RD|CTLFLAG_MPSAFE|CTLFLAG_CAPRD, sysctl_sysctl_oidfmt, ""); static int @@ -896,7 +939,8 @@ sysctl_sysctl_oiddescr(SYSCTL_HANDLER_ARGS) return (error); } -static SYSCTL_NODE(_sysctl, 5, oiddescr, CTLFLAG_RD, sysctl_sysctl_oiddescr, ""); +static SYSCTL_NODE(_sysctl, 5, oiddescr, CTLFLAG_RD|CTLFLAG_CAPRD, + sysctl_sysctl_oiddescr, ""); /* * Default "handler" functions. @@ -1012,9 +1056,8 @@ sysctl_handle_long(SYSCTL_HANDLER_ARGS) * a variable: point arg1 at it. * a constant: pass it in arg2. */ - int -sysctl_handle_quad(SYSCTL_HANDLER_ARGS) +sysctl_handle_64(SYSCTL_HANDLER_ARGS) { int error = 0; uint64_t tmpout; @@ -1198,7 +1241,7 @@ kernel_sysctl(struct thread *td, int *name, u_int namelen, void *old, req.oldfunc = sysctl_old_kernel; req.newfunc = sysctl_new_kernel; - req.lock = REQ_LOCKED; + req.lock = REQ_UNWIRED; SYSCTL_XLOCK(); error = sysctl_root(0, name, namelen, &req); @@ -1314,7 +1357,7 @@ sysctl_wire_old_buffer(struct sysctl_req *req, size_t len) wiredlen = (len > 0 && len < req->oldlen) ? len : req->oldlen; ret = 0; - if (req->lock == REQ_LOCKED && req->oldptr && + if (req->lock != REQ_WIRED && req->oldptr && req->oldfunc == sysctl_old_user) { if (wiredlen != 0) { ret = vslock(req->oldptr, wiredlen); @@ -1350,8 +1393,6 @@ sysctl_find_oid(int *name, u_int namelen, struct sysctl_oid **noid, return (ENOENT); indx++; - if (oid->oid_kind & CTLFLAG_NOLOCK) - req->lock = REQ_UNLOCKED; if ((oid->oid_kind & CTLTYPE) == CTLTYPE_NODE) { if (oid->oid_handler != NULL || indx == namelen) { *noid = oid; @@ -1410,6 +1451,19 @@ sysctl_root(SYSCTL_HANDLER_ARGS) #ifndef __rtems__ KASSERT(req->td != NULL, ("sysctl_root(): req->td == NULL")); +#ifdef CAPABILITY_MODE + /* + * If the process is in capability mode, then don't permit reading or + * writing unless specifically granted for the node. + */ + if (IN_CAPABILITY_MODE(req->td)) { + if (req->oldptr && !(oid->oid_kind & CTLFLAG_CAPRD)) + return (EPERM); + if (req->newptr && !(oid->oid_kind & CTLFLAG_CAPWR)) + return (EPERM); + } +#endif + /* Is this sysctl sensitive to securelevels? */ if (req->newptr && (oid->oid_kind & CTLFLAG_SECURE)) { lvl = (oid->oid_kind & CTLMASK_SECURE) >> CTLSHIFT_SECURE; @@ -1487,7 +1541,7 @@ struct sysctl_args { }; #endif int -__sysctl(struct thread *td, struct sysctl_args *uap) +sys___sysctl(struct thread *td, struct sysctl_args *uap) { int error, i, name[CTL_MAXNAME]; size_t j; @@ -1555,7 +1609,7 @@ userland_sysctl(struct thread *td, int *name, u_int namelen, void *old, req.oldfunc = sysctl_old_user; req.newfunc = sysctl_new_user; - req.lock = REQ_LOCKED; + req.lock = REQ_UNWIRED; #ifdef KTRACE if (KTRPOINT(curthread, KTR_SYSCTL)) @@ -1577,7 +1631,7 @@ userland_sysctl(struct thread *td, int *name, u_int namelen, void *old, SYSCTL_XUNLOCK(); if (error != EAGAIN) break; - uio_yield(); + kern_yield(PRI_USER); } CURVNET_RESTORE(); diff --git a/freebsd/sys/kern/kern_time.c b/freebsd/sys/kern/kern_time.c index 344c379e..e113aef6 100644 --- a/freebsd/sys/kern/kern_time.c +++ b/freebsd/sys/kern/kern_time.c @@ -178,7 +178,7 @@ struct clock_gettime_args { #ifndef __rtems__ /* ARGSUSED */ int -clock_gettime(struct thread *td, struct clock_gettime_args *uap) +sys_clock_gettime(struct thread *td, struct clock_gettime_args *uap) { struct timespec ats; int error; @@ -265,7 +265,7 @@ struct clock_settime_args { #ifndef __rtems__ /* ARGSUSED */ int -clock_settime(struct thread *td, struct clock_settime_args *uap) +sys_clock_settime(struct thread *td, struct clock_settime_args *uap) { struct timespec ats; int error; @@ -302,7 +302,7 @@ struct clock_getres_args { #endif #ifndef __rtems__ int -clock_getres(struct thread *td, struct clock_getres_args *uap) +sys_clock_getres(struct thread *td, struct clock_getres_args *uap) { struct timespec ts; int error; @@ -407,7 +407,7 @@ struct nanosleep_args { #endif /* ARGSUSED */ int -nanosleep(struct thread *td, struct nanosleep_args *uap) +sys_nanosleep(struct thread *td, struct nanosleep_args *uap) { struct timespec rmt, rqt; int error; @@ -438,7 +438,7 @@ struct gettimeofday_args { #endif /* ARGSUSED */ int -gettimeofday(struct thread *td, struct gettimeofday_args *uap) +sys_gettimeofday(struct thread *td, struct gettimeofday_args *uap) { struct timeval atv; struct timezone rtz; @@ -464,7 +464,7 @@ struct settimeofday_args { #endif /* ARGSUSED */ int -settimeofday(struct thread *td, struct settimeofday_args *uap) +sys_settimeofday(struct thread *td, struct settimeofday_args *uap) { struct timeval atv, *tvp; struct timezone atz, *tzp; @@ -536,7 +536,7 @@ struct getitimer_args { }; #endif int -getitimer(struct thread *td, struct getitimer_args *uap) +sys_getitimer(struct thread *td, struct getitimer_args *uap) { struct itimerval aitv; int error; @@ -588,14 +588,14 @@ struct setitimer_args { }; #endif int -setitimer(struct thread *td, struct setitimer_args *uap) +sys_setitimer(struct thread *td, struct setitimer_args *uap) { struct itimerval aitv, oitv; int error; if (uap->itv == NULL) { uap->itv = uap->oitv; - return (getitimer(td, (struct getitimer_args *)uap)); + return (sys_getitimer(td, (struct getitimer_args *)uap)); } if ((error = copyin(uap->itv, &aitv, sizeof(struct itimerval)))) @@ -672,13 +672,11 @@ realitexpire(void *arg) struct timeval ctv, ntv; p = (struct proc *)arg; - PROC_LOCK(p); - psignal(p, SIGALRM); + kern_psignal(p, SIGALRM); if (!timevalisset(&p->p_realtimer.it_interval)) { timevalclear(&p->p_realtimer.it_value); if (p->p_flag & P_WEXIT) wakeup(&p->p_itcallout); - PROC_UNLOCK(p); return; } for (;;) { @@ -690,7 +688,6 @@ realitexpire(void *arg) timevalsub(&ntv, &ctv); callout_reset(&p->p_itcallout, tvtohz(&ntv) - 1, realitexpire, p); - PROC_UNLOCK(p); return; } } @@ -940,7 +937,7 @@ struct ktimer_create_args { }; #endif int -ktimer_create(struct thread *td, struct ktimer_create_args *uap) +sys_ktimer_create(struct thread *td, struct ktimer_create_args *uap) { struct sigevent *evp1, ev; int id; @@ -1079,7 +1076,7 @@ struct ktimer_delete_args { }; #endif int -ktimer_delete(struct thread *td, struct ktimer_delete_args *uap) +sys_ktimer_delete(struct thread *td, struct ktimer_delete_args *uap) { return (kern_timer_delete(td, uap->timerid)); } @@ -1144,7 +1141,7 @@ struct ktimer_settime_args { }; #endif int -ktimer_settime(struct thread *td, struct ktimer_settime_args *uap) +sys_ktimer_settime(struct thread *td, struct ktimer_settime_args *uap) { struct proc *p = td->td_proc; struct itimer *it; @@ -1185,7 +1182,7 @@ struct ktimer_gettime_args { }; #endif int -ktimer_gettime(struct thread *td, struct ktimer_gettime_args *uap) +sys_ktimer_gettime(struct thread *td, struct ktimer_gettime_args *uap) { struct proc *p = td->td_proc; struct itimer *it; @@ -1216,7 +1213,7 @@ struct timer_getoverrun_args { }; #endif int -ktimer_getoverrun(struct thread *td, struct ktimer_getoverrun_args *uap) +sys_ktimer_getoverrun(struct thread *td, struct ktimer_getoverrun_args *uap) { struct proc *p = td->td_proc; struct itimer *it; @@ -1419,28 +1416,22 @@ void itimer_fire(struct itimer *it) { struct proc *p = it->it_proc; - int ret; + struct thread *td; if (it->it_sigev.sigev_notify == SIGEV_SIGNAL || it->it_sigev.sigev_notify == SIGEV_THREAD_ID) { - PROC_LOCK(p); + if (sigev_findtd(p, &it->it_sigev, &td) != 0) { + ITIMER_LOCK(it); + timespecclear(&it->it_time.it_value); + timespecclear(&it->it_time.it_interval); + callout_stop(&it->it_callout); + ITIMER_UNLOCK(it); + return; + } if (!KSI_ONQ(&it->it_ksi)) { it->it_ksi.ksi_errno = 0; - ret = psignal_event(p, &it->it_sigev, &it->it_ksi); - if (__predict_false(ret != 0)) { - it->it_overrun++; - /* - * Broken userland code, thread went - * away, disarm the timer. - */ - if (ret == ESRCH) { - ITIMER_LOCK(it); - timespecclear(&it->it_time.it_value); - timespecclear(&it->it_time.it_interval); - callout_stop(&it->it_callout); - ITIMER_UNLOCK(it); - } - } + ksiginfo_set_sigev(&it->it_ksi, &it->it_sigev); + tdsendsignal(p, td, it->it_ksi.ksi_signo, &it->it_ksi); } else { if (it->it_overrun < INT_MAX) it->it_overrun++; diff --git a/freebsd/sys/kern/kern_timeout.c b/freebsd/sys/kern/kern_timeout.c index 38871b03..4ef7909f 100644 --- a/freebsd/sys/kern/kern_timeout.c +++ b/freebsd/sys/kern/kern_timeout.c @@ -136,6 +136,7 @@ struct callout_cpu { int cc_softticks; int cc_cancel; int cc_waiting; + int cc_firsttick; }; #ifdef SMP @@ -158,8 +159,9 @@ struct callout_cpu cc_cpu; #define CC_LOCK_ASSERT(cc) mtx_assert(&(cc)->cc_lock, MA_OWNED) static int timeout_cpu; +void (*callout_new_inserted)(int cpu, int ticks) = NULL; -MALLOC_DEFINE(M_CALLOUT, "callout", "Callout datastructures"); +static MALLOC_DEFINE(M_CALLOUT, "callout", "Callout datastructures"); /** * Locked by cc_lock: @@ -352,8 +354,6 @@ kern_timeout_callwheel_init(void) /* * Start standard softclock thread. */ -void *softclock_ih; - static void start_softclock(void *dummy) { @@ -364,9 +364,8 @@ start_softclock(void *dummy) cc = CC_CPU(timeout_cpu); if (swi_add(&clk_intr_event, "clock", softclock, cc, SWI_CLOCK, - INTR_MPSAFE, &softclock_ih)) + INTR_MPSAFE, &cc->cc_cookie)) panic("died while creating standard software ithreads"); - cc->cc_cookie = softclock_ih; #ifdef SMP CPU_FOREACH(cpu) { if (cpu == timeout_cpu) @@ -400,7 +399,7 @@ callout_tick(void) need_softclock = 0; cc = CC_SELF(); mtx_lock_spin_flags(&cc->cc_lock, MTX_QUIET); - cc->cc_ticks++; + cc->cc_firsttick = cc->cc_ticks = ticks; for (; (cc->cc_softticks - cc->cc_ticks) <= 0; cc->cc_softticks++) { bucket = cc->cc_softticks & callwheelmask; if (!TAILQ_EMPTY(&cc->cc_callwheel[bucket])) { @@ -417,6 +416,33 @@ callout_tick(void) swi_sched(cc->cc_cookie, 0); } +int +callout_tickstofirst(int limit) +{ + struct callout_cpu *cc; + struct callout *c; + struct callout_tailq *sc; + int curticks; + int skip = 1; + + cc = CC_SELF(); + mtx_lock_spin_flags(&cc->cc_lock, MTX_QUIET); + curticks = cc->cc_ticks; + while( skip < ncallout && skip < limit ) { + sc = &cc->cc_callwheel[ (curticks+skip) & callwheelmask ]; + /* search scanning ticks */ + TAILQ_FOREACH( c, sc, c_links.tqe ){ + if (c->c_time - curticks <= ncallout) + goto out; + } + skip++; + } +out: + cc->cc_firsttick = curticks + skip; + mtx_unlock_spin_flags(&cc->cc_lock, MTX_QUIET); + return (skip); +} + static struct callout_cpu * callout_lock(struct callout *c) { @@ -453,24 +479,28 @@ callout_cc_add(struct callout *c, struct callout_cpu *cc, int to_ticks, c->c_arg = arg; c->c_flags |= (CALLOUT_ACTIVE | CALLOUT_PENDING); c->c_func = func; - c->c_time = cc->cc_ticks + to_ticks; - TAILQ_INSERT_TAIL(&cc->cc_callwheel[c->c_time & callwheelmask], + c->c_time = ticks + to_ticks; + TAILQ_INSERT_TAIL(&cc->cc_callwheel[c->c_time & callwheelmask], c, c_links.tqe); + if ((c->c_time - cc->cc_firsttick) < 0 && + callout_new_inserted != NULL) { + cc->cc_firsttick = c->c_time; + (*callout_new_inserted)(cpu, + to_ticks + (ticks - cc->cc_ticks)); + } } static void callout_cc_del(struct callout *c, struct callout_cpu *cc) { - if (cc->cc_next == c) - cc->cc_next = TAILQ_NEXT(c, c_links.tqe); - if (c->c_flags & CALLOUT_LOCAL_ALLOC) { - c->c_func = NULL; - SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle); - } + if ((c->c_flags & CALLOUT_LOCAL_ALLOC) == 0) + return; + c->c_func = NULL; + SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle); } -static struct callout * +static void softclock_call_cc(struct callout *c, struct callout_cpu *cc, int *mpcalls, int *lockcalls, int *gcalls) { @@ -492,7 +522,9 @@ softclock_call_cc(struct callout *c, struct callout_cpu *cc, int *mpcalls, static timeout_t *lastfunc; #endif - cc->cc_next = TAILQ_NEXT(c, c_links.tqe); + KASSERT((c->c_flags & (CALLOUT_PENDING | CALLOUT_ACTIVE)) == + (CALLOUT_PENDING | CALLOUT_ACTIVE), + ("softclock_call_cc: pend|act %p %x", c, c->c_flags)); class = (c->c_lock != NULL) ? LOCK_CLASS(c->c_lock) : NULL; sharedlock = (c->c_flags & CALLOUT_SHAREDLOCK) ? 0 : 1; c_lock = c->c_lock; @@ -564,20 +596,7 @@ softclock_call_cc(struct callout *c, struct callout_cpu *cc, int *mpcalls, class->lc_unlock(c_lock); skip: CC_LOCK(cc); - /* - * If the current callout is locally allocated (from - * timeout(9)) then put it on the freelist. - * - * Note: we need to check the cached copy of c_flags because - * if it was not local, then it's not safe to deref the - * callout pointer. - */ - if (c_flags & CALLOUT_LOCAL_ALLOC) { - KASSERT(c->c_flags == CALLOUT_LOCAL_ALLOC, - ("corrupted callout")); - c->c_func = NULL; - SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle); - } + KASSERT(cc->cc_curr == c, ("mishandled cc_curr")); cc->cc_curr = NULL; if (cc->cc_waiting) { /* @@ -586,13 +605,22 @@ skip: * If the callout was scheduled for * migration just cancel it. */ - if (cc_cme_migrating(cc)) + if (cc_cme_migrating(cc)) { cc_cme_cleanup(cc); + + /* + * It should be assert here that the callout is not + * destroyed but that is not easy. + */ + c->c_flags &= ~CALLOUT_DFRMIGRATION; + } cc->cc_waiting = 0; CC_UNLOCK(cc); wakeup(&cc->cc_waiting); CC_LOCK(cc); } else if (cc_cme_migrating(cc)) { + KASSERT((c_flags & CALLOUT_LOCAL_ALLOC) == 0, + ("Migrating legacy callout %p", c)); #ifdef SMP /* * If the callout was scheduled for @@ -605,23 +633,20 @@ skip: cc_cme_cleanup(cc); /* - * Handle deferred callout stops + * It should be assert here that the callout is not destroyed + * but that is not easy. + * + * As first thing, handle deferred callout stops. */ if ((c->c_flags & CALLOUT_DFRMIGRATION) == 0) { CTR3(KTR_CALLOUT, "deferred cancelled %p func %p arg %p", c, new_func, new_arg); callout_cc_del(c, cc); - goto nextc; + return; } - c->c_flags &= ~CALLOUT_DFRMIGRATION; - /* - * It should be assert here that the - * callout is not destroyed but that - * is not easy. - */ new_cc = callout_cpu_switch(c, cc, new_cpu); callout_cc_add(c, new_cc, new_ticks, new_func, new_arg, new_cpu); @@ -631,10 +656,19 @@ skip: panic("migration should not happen"); #endif } -#ifdef SMP -nextc: -#endif - return (cc->cc_next); + /* + * If the current callout is locally allocated (from + * timeout(9)) then put it on the freelist. + * + * Note: we need to check the cached copy of c_flags because + * if it was not local, then it's not safe to deref the + * callout pointer. + */ + KASSERT((c_flags & CALLOUT_LOCAL_ALLOC) == 0 || + c->c_flags == CALLOUT_LOCAL_ALLOC, + ("corrupted callout")); + if (c_flags & CALLOUT_LOCAL_ALLOC) + callout_cc_del(c, cc); } /* @@ -701,10 +735,12 @@ softclock(void *arg) steps = 0; } } else { + cc->cc_next = TAILQ_NEXT(c, c_links.tqe); TAILQ_REMOVE(bucket, c, c_links.tqe); - c = softclock_call_cc(c, cc, &mpcalls, + softclock_call_cc(c, cc, &mpcalls, &lockcalls, &gcalls); steps = 0; + c = cc->cc_next; } } } @@ -1073,6 +1109,8 @@ again: CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p", c, c->c_func, c->c_arg); + if (cc->cc_next == c) + cc->cc_next = TAILQ_NEXT(c, c_links.tqe); TAILQ_REMOVE(&cc->cc_callwheel[c->c_time & callwheelmask], c, c_links.tqe); callout_cc_del(c, cc); diff --git a/freebsd/sys/kern/subr_bus.c b/freebsd/sys/kern/subr_bus.c index 3d1bd2bc..951a63c6 100644 --- a/freebsd/sys/kern/subr_bus.c +++ b/freebsd/sys/kern/subr_bus.c @@ -55,6 +55,8 @@ __FBSDID("$FreeBSD$"); #include <sys/bus.h> #include <sys/interrupt.h> +#include <net/vnet.h> + #include <machine/stdarg.h> #include <vm/uma.h> @@ -124,7 +126,7 @@ struct device { char* desc; /**< driver specific description */ int busy; /**< count of calls to device_busy() */ device_state_t state; /**< current device state */ - u_int32_t devflags; /**< api level flags for device_get_flags() */ + uint32_t devflags; /**< api level flags for device_get_flags() */ u_int flags; /**< internal device flags */ #define DF_ENABLED 0x01 /* device should be probed/attached */ #define DF_FIXEDCLASS 0x02 /* devclass specified at create time */ @@ -235,7 +237,7 @@ devclass_sysctl_init(devclass_t dc) SYSCTL_STATIC_CHILDREN(_dev), OID_AUTO, dc->name, CTLFLAG_RD, NULL, ""); SYSCTL_ADD_PROC(&dc->sysctl_ctx, SYSCTL_CHILDREN(dc->sysctl_tree), - OID_AUTO, "%parent", CTLFLAG_RD, + OID_AUTO, "%parent", CTLTYPE_STRING | CTLFLAG_RD, dc, DEVCLASS_SYSCTL_PARENT, devclass_sysctl_handler, "A", "parent class"); } @@ -300,23 +302,23 @@ device_sysctl_init(device_t dev) dev->nameunit + strlen(dc->name), CTLFLAG_RD, NULL, ""); SYSCTL_ADD_PROC(&dev->sysctl_ctx, SYSCTL_CHILDREN(dev->sysctl_tree), - OID_AUTO, "%desc", CTLFLAG_RD, + OID_AUTO, "%desc", CTLTYPE_STRING | CTLFLAG_RD, dev, DEVICE_SYSCTL_DESC, device_sysctl_handler, "A", "device description"); SYSCTL_ADD_PROC(&dev->sysctl_ctx, SYSCTL_CHILDREN(dev->sysctl_tree), - OID_AUTO, "%driver", CTLFLAG_RD, + OID_AUTO, "%driver", CTLTYPE_STRING | CTLFLAG_RD, dev, DEVICE_SYSCTL_DRIVER, device_sysctl_handler, "A", "device driver name"); SYSCTL_ADD_PROC(&dev->sysctl_ctx, SYSCTL_CHILDREN(dev->sysctl_tree), - OID_AUTO, "%location", CTLFLAG_RD, + OID_AUTO, "%location", CTLTYPE_STRING | CTLFLAG_RD, dev, DEVICE_SYSCTL_LOCATION, device_sysctl_handler, "A", "device location relative to parent"); SYSCTL_ADD_PROC(&dev->sysctl_ctx, SYSCTL_CHILDREN(dev->sysctl_tree), - OID_AUTO, "%pnpinfo", CTLFLAG_RD, + OID_AUTO, "%pnpinfo", CTLTYPE_STRING | CTLFLAG_RD, dev, DEVICE_SYSCTL_PNPINFO, device_sysctl_handler, "A", "device identification"); SYSCTL_ADD_PROC(&dev->sysctl_ctx, SYSCTL_CHILDREN(dev->sysctl_tree), - OID_AUTO, "%parent", CTLFLAG_RD, + OID_AUTO, "%parent", CTLTYPE_STRING | CTLFLAG_RD, dev, DEVICE_SYSCTL_PARENT, device_sysctl_handler, "A", "parent device"); #endif /* __rtems__ */ @@ -605,7 +607,7 @@ devctl_queue_data_f(char *data, int flags) p = devsoftc.async_proc; if (p != NULL) { PROC_LOCK(p); - psignal(p, SIGIO); + kern_psignal(p, SIGIO); PROC_UNLOCK(p); } return; @@ -742,25 +744,7 @@ bad: static void devadded(device_t dev) { - char *pnp = NULL; - char *tmp = NULL; - - pnp = malloc(1024, M_BUS, M_NOWAIT); - if (pnp == NULL) - goto fail; - tmp = malloc(1024, M_BUS, M_NOWAIT); - if (tmp == NULL) - goto fail; - *pnp = '\0'; - bus_child_pnpinfo_str(dev, pnp, 1024); - snprintf(tmp, 1024, "%s %s", device_get_nameunit(dev), pnp); - devaddq("+", tmp, dev); -fail: - if (pnp != NULL) - free(pnp, M_BUS); - if (tmp != NULL) - free(tmp, M_BUS); - return; + devaddq("+", device_get_nameunit(dev), dev); } /* @@ -770,25 +754,7 @@ fail: static void devremoved(device_t dev) { - char *pnp = NULL; - char *tmp = NULL; - - pnp = malloc(1024, M_BUS, M_NOWAIT); - if (pnp == NULL) - goto fail; - tmp = malloc(1024, M_BUS, M_NOWAIT); - if (tmp == NULL) - goto fail; - *pnp = '\0'; - bus_child_pnpinfo_str(dev, pnp, 1024); - snprintf(tmp, 1024, "%s %s", device_get_nameunit(dev), pnp); - devaddq("-", tmp, dev); -fail: - if (pnp != NULL) - free(pnp, M_BUS); - if (tmp != NULL) - free(tmp, M_BUS); - return; + devaddq("-", device_get_nameunit(dev), dev); } /* @@ -796,7 +762,7 @@ fail: * the first time that no match happens, so we don't keep getting this * message. Should that prove to be undesirable, we can change it. * This is called when all drivers that can attach to a given bus - * decline to accept this device. Other errrors may not be detected. + * decline to accept this device. Other errors may not be detected. */ static void devnomatch(device_t dev) @@ -1110,7 +1076,7 @@ devclass_driver_added(devclass_t dc, driver_t *driver) * @param dc the devclass to edit * @param driver the driver to register */ -static int +int devclass_add_driver(devclass_t dc, driver_t *driver, int pass, devclass_t *dcp) { driverlink_t dl; @@ -1243,7 +1209,7 @@ devclass_driver_deleted(devclass_t busclass, devclass_t dc, driver_t *driver) * @param dc the devclass to edit * @param driver the driver to unregister */ -static int +int devclass_delete_driver(devclass_t busclass, driver_t *driver) { devclass_t dc = devclass_find(driver->name); @@ -1953,6 +1919,8 @@ device_delete_child(device_t dev, device_t child) return (error); if (child->devclass) devclass_delete_device(child->devclass, child); + if (child->parent) + BUS_CHILD_DELETED(dev, child); TAILQ_REMOVE(&dev->children, child, link); TAILQ_REMOVE(&bus_data_devices, child, devlink); kobj_delete((kobj_t) child, M_BUS); @@ -2350,7 +2318,7 @@ device_get_desc(device_t dev) /** * @brief Return the device's flags */ -u_int32_t +uint32_t device_get_flags(device_t dev) { return (dev->devflags); @@ -2466,7 +2434,7 @@ device_set_desc_copy(device_t dev, const char* desc) * @brief Set the device's flags */ void -device_set_flags(device_t dev, u_int32_t flags) +device_set_flags(device_t dev, uint32_t flags) { dev->devflags = flags; } @@ -2502,6 +2470,35 @@ device_set_softc(device_t dev, void *softc) } /** + * @brief Free claimed softc + * + * Most drivers do not need to use this since the softc is freed + * automatically when the driver is detached. + */ +void +device_free_softc(void *softc) +{ + free(softc, M_BUS_SC); +} + +/** + * @brief Claim softc + * + * This function can be used to let the driver free the automatically + * allocated softc using "device_free_softc()". This function is + * useful when the driver is refcounting the softc and the softc + * cannot be freed when the "device_detach" method is called. + */ +void +device_claim_softc(device_t dev) +{ + if (dev->softc) + dev->flags |= DF_EXTERNALSOFTC; + else + dev->flags &= ~DF_EXTERNALSOFTC; +} + +/** * @brief Get the device's ivars field * * The ivars field is used by the parent device to store per-device @@ -2790,7 +2787,11 @@ device_probe_and_attach(device_t dev) return (0); else if (error != 0) return (error); - return (device_attach(dev)); + + CURVNET_SET_QUIET(vnet0); + error = device_attach(dev); + CURVNET_RESTORE(); + return error; } /** @@ -3061,6 +3062,7 @@ resource_list_add(struct resource_list *rl, int type, int rid, rle->type = type; rle->rid = rid; rle->res = NULL; + rle->flags = 0; } if (rle->res) @@ -3073,6 +3075,58 @@ resource_list_add(struct resource_list *rl, int type, int rid, } /** + * @brief Determine if a resource entry is busy. + * + * Returns true if a resource entry is busy meaning that it has an + * associated resource that is not an unallocated "reserved" resource. + * + * @param rl the resource list to search + * @param type the resource entry type (e.g. SYS_RES_MEMORY) + * @param rid the resource identifier + * + * @returns Non-zero if the entry is busy, zero otherwise. + */ +int +resource_list_busy(struct resource_list *rl, int type, int rid) +{ + struct resource_list_entry *rle; + + rle = resource_list_find(rl, type, rid); + if (rle == NULL || rle->res == NULL) + return (0); + if ((rle->flags & (RLE_RESERVED | RLE_ALLOCATED)) == RLE_RESERVED) { + KASSERT(!(rman_get_flags(rle->res) & RF_ACTIVE), + ("reserved resource is active")); + return (0); + } + return (1); +} + +/** + * @brief Determine if a resource entry is reserved. + * + * Returns true if a resource entry is reserved meaning that it has an + * associated "reserved" resource. The resource can either be + * allocated or unallocated. + * + * @param rl the resource list to search + * @param type the resource entry type (e.g. SYS_RES_MEMORY) + * @param rid the resource identifier + * + * @returns Non-zero if the entry is reserved, zero otherwise. + */ +int +resource_list_reserved(struct resource_list *rl, int type, int rid) +{ + struct resource_list_entry *rle; + + rle = resource_list_find(rl, type, rid); + if (rle != NULL && rle->flags & RLE_RESERVED) + return (1); + return (0); +} + +/** * @brief Find a resource entry by type and rid. * * @param rl the resource list to search @@ -3115,6 +3169,66 @@ resource_list_delete(struct resource_list *rl, int type, int rid) } /** + * @brief Allocate a reserved resource + * + * This can be used by busses to force the allocation of resources + * that are always active in the system even if they are not allocated + * by a driver (e.g. PCI BARs). This function is usually called when + * adding a new child to the bus. The resource is allocated from the + * parent bus when it is reserved. The resource list entry is marked + * with RLE_RESERVED to note that it is a reserved resource. + * + * Subsequent attempts to allocate the resource with + * resource_list_alloc() will succeed the first time and will set + * RLE_ALLOCATED to note that it has been allocated. When a reserved + * resource that has been allocated is released with + * resource_list_release() the resource RLE_ALLOCATED is cleared, but + * the actual resource remains allocated. The resource can be released to + * the parent bus by calling resource_list_unreserve(). + * + * @param rl the resource list to allocate from + * @param bus the parent device of @p child + * @param child the device for which the resource is being reserved + * @param type the type of resource to allocate + * @param rid a pointer to the resource identifier + * @param start hint at the start of the resource range - pass + * @c 0UL for any start address + * @param end hint at the end of the resource range - pass + * @c ~0UL for any end address + * @param count hint at the size of range required - pass @c 1 + * for any size + * @param flags any extra flags to control the resource + * allocation - see @c RF_XXX flags in + * <sys/rman.h> for details + * + * @returns the resource which was allocated or @c NULL if no + * resource could be allocated + */ +struct resource * +resource_list_reserve(struct resource_list *rl, device_t bus, device_t child, + int type, int *rid, u_long start, u_long end, u_long count, u_int flags) +{ + struct resource_list_entry *rle = NULL; + int passthrough = (device_get_parent(child) != bus); + struct resource *r; + + if (passthrough) + panic( + "resource_list_reserve() should only be called for direct children"); + if (flags & RF_ACTIVE) + panic( + "resource_list_reserve() should only reserve inactive resources"); + + r = resource_list_alloc(rl, bus, child, type, rid, start, end, count, + flags); + if (r != NULL) { + rle = resource_list_find(rl, type, *rid); + rle->flags |= RLE_RESERVED; + } + return (r); +} + +/** * @brief Helper function for implementing BUS_ALLOC_RESOURCE() * * Implement BUS_ALLOC_RESOURCE() by looking up a resource from the list @@ -3165,8 +3279,19 @@ resource_list_alloc(struct resource_list *rl, device_t bus, device_t child, if (!rle) return (NULL); /* no resource of that type/rid */ - if (rle->res) + if (rle->res) { + if (rle->flags & RLE_RESERVED) { + if (rle->flags & RLE_ALLOCATED) + return (NULL); + if ((flags & RF_ACTIVE) && + bus_activate_resource(child, type, *rid, + rle->res) != 0) + return (NULL); + rle->flags |= RLE_ALLOCATED; + return (rle->res); + } panic("resource_list_alloc: resource entry is busy"); + } if (isdefault) { start = rle->start; @@ -3198,7 +3323,7 @@ resource_list_alloc(struct resource_list *rl, device_t bus, device_t child, * @param rl the resource list which was allocated from * @param bus the parent device of @p child * @param child the device which is requesting a release - * @param type the type of resource to allocate + * @param type the type of resource to release * @param rid the resource identifier * @param res the resource to release * @@ -3225,6 +3350,19 @@ resource_list_release(struct resource_list *rl, device_t bus, device_t child, panic("resource_list_release: can't find resource"); if (!rle->res) panic("resource_list_release: resource entry is not busy"); + if (rle->flags & RLE_RESERVED) { + if (rle->flags & RLE_ALLOCATED) { + if (rman_get_flags(res) & RF_ACTIVE) { + error = bus_deactivate_resource(child, type, + rid, res); + if (error) + return (error); + } + rle->flags &= ~RLE_ALLOCATED; + return (0); + } + return (EINVAL); + } error = BUS_RELEASE_RESOURCE(device_get_parent(bus), child, type, rid, res); @@ -3236,6 +3374,45 @@ resource_list_release(struct resource_list *rl, device_t bus, device_t child, } /** + * @brief Fully release a reserved resource + * + * Fully releases a resouce reserved via resource_list_reserve(). + * + * @param rl the resource list which was allocated from + * @param bus the parent device of @p child + * @param child the device whose reserved resource is being released + * @param type the type of resource to release + * @param rid the resource identifier + * @param res the resource to release + * + * @retval 0 success + * @retval non-zero a standard unix error code indicating what + * error condition prevented the operation + */ +int +resource_list_unreserve(struct resource_list *rl, device_t bus, device_t child, + int type, int rid) +{ + struct resource_list_entry *rle = NULL; + int passthrough = (device_get_parent(child) != bus); + + if (passthrough) + panic( + "resource_list_unreserve() should only be called for direct children"); + + rle = resource_list_find(rl, type, rid); + + if (!rle) + panic("resource_list_unreserve: can't find resource"); + if (!(rle->flags & RLE_RESERVED)) + return (EINVAL); + if (rle->flags & RLE_ALLOCATED) + return (EBUSY); + rle->flags &= ~RLE_RESERVED; + return (resource_list_release(rl, bus, child, type, rid, rle->res)); +} + +/** * @brief Print a description of resources in a resource list * * Print all resources of a specified type, for use in BUS_PRINT_CHILD(). @@ -3331,7 +3508,7 @@ bus_generic_probe(device_t dev) * on early-pass busses during BUS_NEW_PASS(). */ if (dl->pass > bus_current_pass) - continue; + continue; DEVICE_IDENTIFY(dl->driver, dev); } @@ -3864,6 +4041,10 @@ bus_generic_rl_release_resource(device_t dev, device_t child, int type, { struct resource_list * rl = NULL; + if (device_get_parent(child) != dev) + return (BUS_RELEASE_RESOURCE(device_get_parent(dev), child, + type, rid, r)); + rl = BUS_GET_RESOURCE_LIST(dev, child); if (!rl) return (EINVAL); @@ -3884,6 +4065,10 @@ bus_generic_rl_alloc_resource(device_t dev, device_t child, int type, { struct resource_list * rl = NULL; + if (device_get_parent(child) != dev) + return (BUS_ALLOC_RESOURCE(device_get_parent(dev), child, + type, rid, start, end, count, flags)); + rl = BUS_GET_RESOURCE_LIST(dev, child); if (!rl) return (NULL); @@ -4038,15 +4223,6 @@ bus_setup_intr(device_t dev, struct resource *r, int flags, return (error); if (handler != NULL && !(flags & INTR_MPSAFE)) device_printf(dev, "[GIANT-LOCKED]\n"); - if (bootverbose && (flags & INTR_MPSAFE)) - device_printf(dev, "[MPSAFE]\n"); - if (filter != NULL) { - if (handler == NULL) - device_printf(dev, "[FILTER]\n"); - else - device_printf(dev, "[FILTER+ITHREAD]\n"); - } else - device_printf(dev, "[ITHREAD]\n"); return (0); } diff --git a/freebsd/sys/kern/subr_hash.c b/freebsd/sys/kern/subr_hash.c new file mode 100644 index 00000000..e526a866 --- /dev/null +++ b/freebsd/sys/kern/subr_hash.c @@ -0,0 +1,130 @@ +#include <machine/rtems-bsd-kernel-space.h> + +/*- + * Copyright (c) 1982, 1986, 1991, 1993 + * The Regents of the University of California. All rights reserved. + * (c) UNIX System Laboratories, Inc. + * All or some portions of this file are derived from material licensed + * to the University of California by American Telephone and Telegraph + * Co. or Unix System Laboratories, Inc. and are reproduced herein with + * the permission of UNIX System Laboratories, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)kern_subr.c 8.3 (Berkeley) 1/21/94 + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <rtems/bsd/sys/param.h> +#include <sys/systm.h> +#include <sys/malloc.h> + +/* + * General routine to allocate a hash table with control of memory flags. + */ +void * +hashinit_flags(int elements, struct malloc_type *type, u_long *hashmask, + int flags) +{ + long hashsize; + LIST_HEAD(generic, generic) *hashtbl; + int i; + + KASSERT(elements > 0, ("%s: bad elements", __func__)); + /* Exactly one of HASH_WAITOK and HASH_NOWAIT must be set. */ + KASSERT((flags & HASH_WAITOK) ^ (flags & HASH_NOWAIT), + ("Bad flags (0x%x) passed to hashinit_flags", flags)); + + for (hashsize = 1; hashsize <= elements; hashsize <<= 1) + continue; + hashsize >>= 1; + + if (flags & HASH_NOWAIT) + hashtbl = malloc((u_long)hashsize * sizeof(*hashtbl), + type, M_NOWAIT); + else + hashtbl = malloc((u_long)hashsize * sizeof(*hashtbl), + type, M_WAITOK); + + if (hashtbl != NULL) { + for (i = 0; i < hashsize; i++) + LIST_INIT(&hashtbl[i]); + *hashmask = hashsize - 1; + } + return (hashtbl); +} + +/* + * Allocate and initialize a hash table with default flag: may sleep. + */ +void * +hashinit(int elements, struct malloc_type *type, u_long *hashmask) +{ + + return (hashinit_flags(elements, type, hashmask, HASH_WAITOK)); +} + +void +hashdestroy(void *vhashtbl, struct malloc_type *type, u_long hashmask) +{ + LIST_HEAD(generic, generic) *hashtbl, *hp; + + hashtbl = vhashtbl; + for (hp = hashtbl; hp <= &hashtbl[hashmask]; hp++) + KASSERT(LIST_EMPTY(hp), ("%s: hash not empty", __func__)); + free(hashtbl, type); +} + +static const int primes[] = { 1, 13, 31, 61, 127, 251, 509, 761, 1021, 1531, + 2039, 2557, 3067, 3583, 4093, 4603, 5119, 5623, 6143, + 6653, 7159, 7673, 8191, 12281, 16381, 24571, 32749 }; +#define NPRIMES (sizeof(primes) / sizeof(primes[0])) + +/* + * General routine to allocate a prime number sized hash table. + */ +void * +phashinit(int elements, struct malloc_type *type, u_long *nentries) +{ + long hashsize; + LIST_HEAD(generic, generic) *hashtbl; + int i; + + KASSERT(elements > 0, ("%s: bad elements", __func__)); + for (i = 1, hashsize = primes[1]; hashsize <= elements;) { + i++; + if (i == NPRIMES) + break; + hashsize = primes[i]; + } + hashsize = primes[i - 1]; + hashtbl = malloc((u_long)hashsize * sizeof(*hashtbl), type, M_WAITOK); + for (i = 0; i < hashsize; i++) + LIST_INIT(&hashtbl[i]); + *nentries = hashsize; + return (hashtbl); +} diff --git a/freebsd/sys/kern/subr_kobj.c b/freebsd/sys/kern/subr_kobj.c index 9cfe868b..5666f274 100644 --- a/freebsd/sys/kern/subr_kobj.c +++ b/freebsd/sys/kern/subr_kobj.c @@ -66,7 +66,7 @@ static int kobj_next_id = 1; #define KOBJ_UNLOCK() mtx_unlock(&kobj_mtx) #define KOBJ_ASSERT(what) mtx_assert(&kobj_mtx, what); -SYSCTL_UINT(_kern, OID_AUTO, kobj_methodcount, CTLFLAG_RD, +SYSCTL_INT(_kern, OID_AUTO, kobj_methodcount, CTLFLAG_RD, &kobj_next_id, 0, ""); static void diff --git a/freebsd/sys/kern/subr_module.c b/freebsd/sys/kern/subr_module.c index 592bd083..f2aa7026 100644 --- a/freebsd/sys/kern/subr_module.c +++ b/freebsd/sys/kern/subr_module.c @@ -37,7 +37,8 @@ __FBSDID("$FreeBSD$"); * Preloaded module support */ -caddr_t preload_metadata; +vm_offset_t preload_addr_relocate = 0; +caddr_t preload_metadata; /* * Search for the preloaded module (name) @@ -46,24 +47,24 @@ caddr_t preload_search_by_name(const char *name) { caddr_t curp; - u_int32_t *hdr; + uint32_t *hdr; int next; if (preload_metadata != NULL) { curp = preload_metadata; for (;;) { - hdr = (u_int32_t *)curp; + hdr = (uint32_t *)curp; if (hdr[0] == 0 && hdr[1] == 0) break; /* Search for a MODINFO_NAME field */ if ((hdr[0] == MODINFO_NAME) && - !strcmp(name, curp + sizeof(u_int32_t) * 2)) + !strcmp(name, curp + sizeof(uint32_t) * 2)) return(curp); /* skip to next field */ - next = sizeof(u_int32_t) * 2 + hdr[1]; + next = sizeof(uint32_t) * 2 + hdr[1]; next = roundup(next, sizeof(u_long)); curp += next; } @@ -78,7 +79,7 @@ caddr_t preload_search_by_type(const char *type) { caddr_t curp, lname; - u_int32_t *hdr; + uint32_t *hdr; int next; if (preload_metadata != NULL) { @@ -86,7 +87,7 @@ preload_search_by_type(const char *type) curp = preload_metadata; lname = NULL; for (;;) { - hdr = (u_int32_t *)curp; + hdr = (uint32_t *)curp; if (hdr[0] == 0 && hdr[1] == 0) break; @@ -96,11 +97,11 @@ preload_search_by_type(const char *type) /* Search for a MODINFO_TYPE field */ if ((hdr[0] == MODINFO_TYPE) && - !strcmp(type, curp + sizeof(u_int32_t) * 2)) + !strcmp(type, curp + sizeof(uint32_t) * 2)) return(lname); /* skip to next field */ - next = sizeof(u_int32_t) * 2 + hdr[1]; + next = sizeof(uint32_t) * 2 + hdr[1]; next = roundup(next, sizeof(u_long)); curp += next; } @@ -115,7 +116,7 @@ caddr_t preload_search_next_name(caddr_t base) { caddr_t curp; - u_int32_t *hdr; + uint32_t *hdr; int next; if (preload_metadata != NULL) { @@ -124,15 +125,15 @@ preload_search_next_name(caddr_t base) if (base) { /* skip to next field */ curp = base; - hdr = (u_int32_t *)curp; - next = sizeof(u_int32_t) * 2 + hdr[1]; + hdr = (uint32_t *)curp; + next = sizeof(uint32_t) * 2 + hdr[1]; next = roundup(next, sizeof(u_long)); curp += next; } else curp = preload_metadata; for (;;) { - hdr = (u_int32_t *)curp; + hdr = (uint32_t *)curp; if (hdr[0] == 0 && hdr[1] == 0) break; @@ -141,7 +142,7 @@ preload_search_next_name(caddr_t base) return curp; /* skip to next field */ - next = sizeof(u_int32_t) * 2 + hdr[1]; + next = sizeof(uint32_t) * 2 + hdr[1]; next = roundup(next, sizeof(u_long)); curp += next; } @@ -157,13 +158,13 @@ caddr_t preload_search_info(caddr_t mod, int inf) { caddr_t curp; - u_int32_t *hdr; - u_int32_t type = 0; + uint32_t *hdr; + uint32_t type = 0; int next; curp = mod; for (;;) { - hdr = (u_int32_t *)curp; + hdr = (uint32_t *)curp; /* end of module data? */ if (hdr[0] == 0 && hdr[1] == 0) break; @@ -184,10 +185,10 @@ preload_search_info(caddr_t mod, int inf) * data. */ if (hdr[0] == inf) - return(curp + (sizeof(u_int32_t) * 2)); + return(curp + (sizeof(uint32_t) * 2)); /* skip to next field */ - next = sizeof(u_int32_t) * 2 + hdr[1]; + next = sizeof(uint32_t) * 2 + hdr[1]; next = roundup(next, sizeof(u_long)); curp += next; } @@ -201,7 +202,7 @@ void preload_delete_name(const char *name) { caddr_t curp; - u_int32_t *hdr; + uint32_t *hdr; int next; int clearing; @@ -210,13 +211,13 @@ preload_delete_name(const char *name) clearing = 0; curp = preload_metadata; for (;;) { - hdr = (u_int32_t *)curp; + hdr = (uint32_t *)curp; if (hdr[0] == 0 && hdr[1] == 0) break; /* Search for a MODINFO_NAME field */ if (hdr[0] == MODINFO_NAME) { - if (!strcmp(name, curp + sizeof(u_int32_t) * 2)) + if (!strcmp(name, curp + sizeof(uint32_t) * 2)) clearing = 1; /* got it, start clearing */ else if (clearing) clearing = 0; /* at next one now.. better stop */ @@ -225,19 +226,41 @@ preload_delete_name(const char *name) hdr[0] = MODINFO_EMPTY; /* skip to next field */ - next = sizeof(u_int32_t) * 2 + hdr[1]; + next = sizeof(uint32_t) * 2 + hdr[1]; next = roundup(next, sizeof(u_long)); curp += next; } } } +void * +preload_fetch_addr(caddr_t mod) +{ + caddr_t *mdp; + + mdp = (caddr_t *)preload_search_info(mod, MODINFO_ADDR); + if (mdp == NULL) + return (NULL); + return (*mdp + preload_addr_relocate); +} + +size_t +preload_fetch_size(caddr_t mod) +{ + size_t *mdp; + + mdp = (size_t *)preload_search_info(mod, MODINFO_SIZE); + if (mdp == NULL) + return (0); + return (*mdp); +} + /* Called from locore on i386. Convert physical pointers to kvm. Sigh. */ void preload_bootstrap_relocate(vm_offset_t offset) { caddr_t curp; - u_int32_t *hdr; + uint32_t *hdr; vm_offset_t *ptr; int next; @@ -245,7 +268,7 @@ preload_bootstrap_relocate(vm_offset_t offset) curp = preload_metadata; for (;;) { - hdr = (u_int32_t *)curp; + hdr = (uint32_t *)curp; if (hdr[0] == 0 && hdr[1] == 0) break; @@ -254,14 +277,14 @@ preload_bootstrap_relocate(vm_offset_t offset) case MODINFO_ADDR: case MODINFO_METADATA|MODINFOMD_SSYM: case MODINFO_METADATA|MODINFOMD_ESYM: - ptr = (vm_offset_t *)(curp + (sizeof(u_int32_t) * 2)); + ptr = (vm_offset_t *)(curp + (sizeof(uint32_t) * 2)); *ptr += offset; break; } /* The rest is beyond us for now */ /* skip to next field */ - next = sizeof(u_int32_t) * 2 + hdr[1]; + next = sizeof(uint32_t) * 2 + hdr[1]; next = roundup(next, sizeof(u_long)); curp += next; } diff --git a/freebsd/sys/kern/subr_rman.c b/freebsd/sys/kern/subr_rman.c index 5480201c..668201a9 100644 --- a/freebsd/sys/kern/subr_rman.c +++ b/freebsd/sys/kern/subr_rman.c @@ -1087,11 +1087,21 @@ found: return (error); } -SYSCTL_NODE(_hw_bus, OID_AUTO, rman, CTLFLAG_RD, sysctl_rman, +static SYSCTL_NODE(_hw_bus, OID_AUTO, rman, CTLFLAG_RD, sysctl_rman, "kernel resource manager"); #ifdef DDB static void +dump_rman_header(struct rman *rm) +{ + + if (db_pager_quit) + return; + db_printf("rman %p: %s (0x%lx-0x%lx full range)\n", + rm, rm->rm_descr, rm->rm_start, rm->rm_end); +} + +static void dump_rman(struct rman *rm) { struct resource_i *r; @@ -1099,8 +1109,6 @@ dump_rman(struct rman *rm) if (db_pager_quit) return; - db_printf("rman: %s\n", rm->rm_descr); - db_printf(" 0x%lx-0x%lx (full range)\n", rm->rm_start, rm->rm_end); TAILQ_FOREACH(r, &rm->rm_list, r_link) { if (r->r_dev != NULL) { devname = device_get_nameunit(r->r_dev); @@ -1121,16 +1129,29 @@ dump_rman(struct rman *rm) DB_SHOW_COMMAND(rman, db_show_rman) { - if (have_addr) + if (have_addr) { + dump_rman_header((struct rman *)addr); dump_rman((struct rman *)addr); + } +} + +DB_SHOW_COMMAND(rmans, db_show_rmans) +{ + struct rman *rm; + + TAILQ_FOREACH(rm, &rman_head, rm_link) { + dump_rman_header(rm); + } } DB_SHOW_ALL_COMMAND(rman, db_show_all_rman) { struct rman *rm; - TAILQ_FOREACH(rm, &rman_head, rm_link) + TAILQ_FOREACH(rm, &rman_head, rm_link) { + dump_rman_header(rm); dump_rman(rm); + } } DB_SHOW_ALIAS(allrman, db_show_all_rman); #endif diff --git a/freebsd/sys/kern/subr_sbuf.c b/freebsd/sys/kern/subr_sbuf.c index a92c09c1..9ea11990 100644 --- a/freebsd/sys/kern/subr_sbuf.c +++ b/freebsd/sys/kern/subr_sbuf.c @@ -52,12 +52,6 @@ __FBSDID("$FreeBSD$"); #include <sys/sbuf.h> -struct sbuf_drain { - sbuf_drain_func *s_func; /* drain function */ - void *s_arg; /* user-supplied drain argument */ - int s_error; /* current error code */ -}; - #ifdef _KERNEL static MALLOC_DEFINE(M_SBUF, "sbuf", "string buffers"); #define SBMALLOC(size) malloc(size, M_SBUF, M_WAITOK) @@ -74,10 +68,10 @@ static MALLOC_DEFINE(M_SBUF, "sbuf", "string buffers"); #define SBUF_ISDYNAMIC(s) ((s)->s_flags & SBUF_DYNAMIC) #define SBUF_ISDYNSTRUCT(s) ((s)->s_flags & SBUF_DYNSTRUCT) #define SBUF_ISFINISHED(s) ((s)->s_flags & SBUF_FINISHED) -#define SBUF_HASOVERFLOWED(s) ((s)->s_flags & SBUF_OVERFLOWED) #define SBUF_HASROOM(s) ((s)->s_len < (s)->s_size - 1) -#define SBUF_FREESPACE(s) ((s)->s_size - (s)->s_len - 1) +#define SBUF_FREESPACE(s) ((s)->s_size - ((s)->s_len + 1)) #define SBUF_CANEXTEND(s) ((s)->s_flags & SBUF_AUTOEXTEND) +#define SBUF_ISSECTION(s) ((s)->s_flags & SBUF_INSECTION) /* * Set / clear flags @@ -86,8 +80,14 @@ static MALLOC_DEFINE(M_SBUF, "sbuf", "string buffers"); #define SBUF_CLEARFLAG(s, f) do { (s)->s_flags &= ~(f); } while (0) #define SBUF_MINEXTENDSIZE 16 /* Should be power of 2. */ + +#ifdef PAGE_SIZE #define SBUF_MAXEXTENDSIZE PAGE_SIZE #define SBUF_MAXEXTENDINCR PAGE_SIZE +#else +#define SBUF_MAXEXTENDSIZE 4096 +#define SBUF_MAXEXTENDINCR 4096 +#endif /* * Debugging support @@ -103,7 +103,8 @@ _assert_sbuf_integrity(const char *fun, struct sbuf *s) KASSERT(s->s_buf != NULL, ("%s called with uninitialized or corrupt sbuf", fun)); KASSERT(s->s_len < s->s_size, - ("wrote past end of sbuf (%d >= %d)", s->s_len, s->s_size)); + ("wrote past end of sbuf (%jd >= %jd)", + (intmax_t)s->s_len, (intmax_t)s->s_size)); } static void @@ -146,7 +147,6 @@ sbuf_extendsize(int size) return (newsize); } - /* * Extend an sbuf. */ @@ -162,7 +162,7 @@ sbuf_extend(struct sbuf *s, int addlen) newbuf = SBMALLOC(newsize); if (newbuf == NULL) return (-1); - bcopy(s->s_buf, newbuf, s->s_size); + memcpy(newbuf, s->s_buf, s->s_size); if (SBUF_ISDYNAMIC(s)) SBFREE(s->s_buf); else @@ -173,6 +173,38 @@ sbuf_extend(struct sbuf *s, int addlen) } /* + * Initialize the internals of an sbuf. + * If buf is non-NULL, it points to a static or already-allocated string + * big enough to hold at least length characters. + */ +static struct sbuf * +sbuf_newbuf(struct sbuf *s, char *buf, int length, int flags) +{ + + memset(s, 0, sizeof(*s)); + s->s_flags = flags; + s->s_size = length; + s->s_buf = buf; + + if ((s->s_flags & SBUF_AUTOEXTEND) == 0) { + KASSERT(s->s_size >= 0, + ("attempt to create a too small sbuf")); + } + + if (s->s_buf != NULL) + return (s); + + if ((flags & SBUF_AUTOEXTEND) != 0) + s->s_size = sbuf_extendsize(s->s_size); + + s->s_buf = SBMALLOC(s->s_size); + if (s->s_buf == NULL) + return (NULL); + SBUF_SETFLAG(s, SBUF_DYNAMIC); + return (s); +} + +/* * Initialize an sbuf. * If buf is non-NULL, it points to a static or already-allocated string * big enough to hold at least length characters. @@ -187,31 +219,17 @@ sbuf_new(struct sbuf *s, char *buf, int length, int flags) ("%s called with invalid flags", __func__)); flags &= SBUF_USRFLAGMSK; - if (s == NULL) { - s = SBMALLOC(sizeof(*s)); - if (s == NULL) - return (NULL); - bzero(s, sizeof(*s)); - s->s_flags = flags; - SBUF_SETFLAG(s, SBUF_DYNSTRUCT); - } else { - bzero(s, sizeof(*s)); - s->s_flags = flags; - } - s->s_size = length; - if (buf != NULL) { - s->s_buf = buf; - return (s); - } - if ((flags & SBUF_AUTOEXTEND) != 0) - s->s_size = sbuf_extendsize(s->s_size); - s->s_buf = SBMALLOC(s->s_size); - if (s->s_buf == NULL) { - if (SBUF_ISDYNSTRUCT(s)) - SBFREE(s); + if (s != NULL) + return (sbuf_newbuf(s, buf, length, flags)); + + s = SBMALLOC(sizeof(*s)); + if (s == NULL) + return (NULL); + if (sbuf_newbuf(s, buf, length, flags) == NULL) { + SBFREE(s); return (NULL); } - SBUF_SETFLAG(s, SBUF_DYNAMIC); + SBUF_SETFLAG(s, SBUF_DYNSTRUCT); return (s); } @@ -239,6 +257,8 @@ sbuf_uionew(struct sbuf *s, struct uio *uio, int *error) return (NULL); } s->s_len = s->s_size - 1; + if (SBUF_ISSECTION(s)) + s->s_sect_len = s->s_size - 1; *error = 0; return (s); } @@ -255,10 +275,9 @@ sbuf_clear(struct sbuf *s) /* don't care if it's finished or not */ SBUF_CLEARFLAG(s, SBUF_FINISHED); - SBUF_CLEARFLAG(s, SBUF_OVERFLOWED); - if (s->s_drain != NULL) - s->s_drain->s_error = 0; + s->s_error = 0; s->s_len = 0; + s->s_sect_len = 0; } /* @@ -266,16 +285,19 @@ sbuf_clear(struct sbuf *s) * Effectively truncates the sbuf at the new position. */ int -sbuf_setpos(struct sbuf *s, int pos) +sbuf_setpos(struct sbuf *s, ssize_t pos) { assert_sbuf_integrity(s); assert_sbuf_state(s, 0); KASSERT(pos >= 0, - ("attempt to seek to a negative position (%d)", pos)); + ("attempt to seek to a negative position (%jd)", (intmax_t)pos)); KASSERT(pos < s->s_size, - ("attempt to seek past end of sbuf (%d >= %d)", pos, s->s_size)); + ("attempt to seek past end of sbuf (%jd >= %jd)", + (intmax_t)pos, (intmax_t)s->s_size)); + KASSERT(!SBUF_ISSECTION(s), + ("attempt to seek when in a section")); if (pos < 0 || pos > s->s_len) return (-1); @@ -293,22 +315,10 @@ sbuf_set_drain(struct sbuf *s, sbuf_drain_func *func, void *ctx) assert_sbuf_state(s, 0); assert_sbuf_integrity(s); - KASSERT((s->s_drain != NULL && func == s->s_drain->s_func) || - s->s_len == 0, + KASSERT(func == s->s_drain_func || s->s_len == 0, ("Cannot change drain to %p on non-empty sbuf %p", func, s)); - if (func == NULL) { - SBFREE(s->s_drain); - s->s_drain = NULL; - return; - } - if (s->s_drain == NULL) { - s->s_drain = SBMALLOC(sizeof(*s->s_drain)); - if (s->s_drain == NULL) - return; - } - s->s_drain->s_func = func; - s->s_drain->s_arg = ctx; - s->s_drain->s_error = 0; + s->s_drain_func = func; + s->s_drain_arg = ctx; } /* @@ -320,11 +330,11 @@ sbuf_drain(struct sbuf *s) int len; KASSERT(s->s_len > 0, ("Shouldn't drain empty sbuf %p", s)); - len = s->s_drain->s_func(s->s_drain->s_arg, s->s_buf, s->s_len); + KASSERT(s->s_error == 0, ("Called %s with error on %p", __func__, s)); + len = s->s_drain_func(s->s_drain_arg, s->s_buf, s->s_len); if (len < 0) { - s->s_drain->s_error = -len; - SBUF_SETFLAG(s, SBUF_OVERFLOWED); - return (s->s_drain->s_error); + s->s_error = -len; + return (s->s_error); } KASSERT(len > 0 && len <= s->s_len, ("Bad drain amount %d for sbuf %p", len, s)); @@ -349,39 +359,29 @@ sbuf_drain(struct sbuf *s) * buffer and marking overflow. */ static void -sbuf_put_byte(int c, struct sbuf *s) +sbuf_put_byte(struct sbuf *s, int c) { assert_sbuf_integrity(s); assert_sbuf_state(s, 0); - if (SBUF_HASOVERFLOWED(s)) + if (s->s_error != 0) return; if (SBUF_FREESPACE(s) <= 0) { - /* + /* * If there is a drain, use it, otherwise extend the * buffer. */ - if (s->s_drain != NULL) + if (s->s_drain_func != NULL) (void)sbuf_drain(s); else if (sbuf_extend(s, 1) < 0) - SBUF_SETFLAG(s, SBUF_OVERFLOWED); - if (SBUF_HASOVERFLOWED(s)) + s->s_error = ENOMEM; + if (s->s_error != 0) return; } s->s_buf[s->s_len++] = c; -} - -/* - * Append a non-NUL character to an sbuf. This prototype signature is - * suitable for use with kvprintf(9). - */ -static void -sbuf_putc_func(int c, void *arg) -{ - - if (c != '\0') - sbuf_put_byte(c, arg); + if (SBUF_ISSECTION(s)) + s->s_sect_len++; } /* @@ -396,13 +396,13 @@ sbuf_bcat(struct sbuf *s, const void *buf, size_t len) assert_sbuf_integrity(s); assert_sbuf_state(s, 0); - if (SBUF_HASOVERFLOWED(s)) + if (s->s_error != 0) return (-1); for (; str < end; str++) { - sbuf_put_byte(*str, s); - if (SBUF_HASOVERFLOWED(s)) + sbuf_put_byte(s, *str); + if (s->s_error != 0) return (-1); - } + } return (0); } @@ -416,10 +416,10 @@ sbuf_bcopyin(struct sbuf *s, const void *uaddr, size_t len) assert_sbuf_integrity(s); assert_sbuf_state(s, 0); - KASSERT(s->s_drain == NULL, + KASSERT(s->s_drain_func == NULL, ("Nonsensical copyin to sbuf %p with a drain", s)); - if (SBUF_HASOVERFLOWED(s)) + if (s->s_error != 0) return (-1); if (len == 0) return (0); @@ -460,12 +460,12 @@ sbuf_cat(struct sbuf *s, const char *str) assert_sbuf_integrity(s); assert_sbuf_state(s, 0); - if (SBUF_HASOVERFLOWED(s)) + if (s->s_error != 0) return (-1); while (*str != '\0') { - sbuf_put_byte(*str++, s); - if (SBUF_HASOVERFLOWED(s)) + sbuf_put_byte(s, *str++); + if (s->s_error != 0) return (-1); } return (0); @@ -482,10 +482,10 @@ sbuf_copyin(struct sbuf *s, const void *uaddr, size_t len) assert_sbuf_integrity(s); assert_sbuf_state(s, 0); - KASSERT(s->s_drain == NULL, + KASSERT(s->s_drain_func == NULL, ("Nonsensical copyin to sbuf %p with a drain", s)); - if (SBUF_HASOVERFLOWED(s)) + if (s->s_error != 0) return (-1); if (len == 0) @@ -497,10 +497,12 @@ sbuf_copyin(struct sbuf *s, const void *uaddr, size_t len) } switch (copyinstr(uaddr, s->s_buf + s->s_len, len + 1, &done)) { case ENAMETOOLONG: - SBUF_SETFLAG(s, SBUF_OVERFLOWED); + s->s_error = ENOMEM; /* fall through */ case 0: s->s_len += done - 1; + if (SBUF_ISSECTION(s)) + s->s_sect_len += done - 1; break; default: return (-1); /* XXX */ @@ -528,6 +530,19 @@ sbuf_cpy(struct sbuf *s, const char *str) * Format the given argument list and append the resulting string to an sbuf. */ #if defined(_KERNEL) && !defined(__rtems__) + +/* + * Append a non-NUL character to an sbuf. This prototype signature is + * suitable for use with kvprintf(9). + */ +static void +sbuf_putc_func(int c, void *arg) +{ + + if (c != '\0') + sbuf_put_byte(arg, c); +} + int sbuf_vprintf(struct sbuf *s, const char *fmt, va_list ap) { @@ -539,7 +554,7 @@ sbuf_vprintf(struct sbuf *s, const char *fmt, va_list ap) ("%s called with a NULL format string", __func__)); (void)kvprintf(fmt, sbuf_putc_func, s, 10, ap); - if (SBUF_HASOVERFLOWED(s)) + if (s->s_error != 0) return (-1); return (0); } @@ -556,7 +571,7 @@ sbuf_vprintf(struct sbuf *s, const char *fmt, va_list ap) KASSERT(fmt != NULL, ("%s called with a NULL format string", __func__)); - if (SBUF_HASOVERFLOWED(s)) + if (s->s_error != 0) return (-1); /* @@ -580,7 +595,7 @@ sbuf_vprintf(struct sbuf *s, const char *fmt, va_list ap) if (SBUF_FREESPACE(s) >= len) break; /* Cannot print with the current available space. */ - if (s->s_drain != NULL && s->s_len > 0) + if (s->s_drain_func != NULL && s->s_len > 0) error = sbuf_drain(s); else error = sbuf_extend(s, len - SBUF_FREESPACE(s)); @@ -598,13 +613,15 @@ sbuf_vprintf(struct sbuf *s, const char *fmt, va_list ap) if (SBUF_FREESPACE(s) < len) len = SBUF_FREESPACE(s); s->s_len += len; + if (SBUF_ISSECTION(s)) + s->s_sect_len += len; if (!SBUF_HASROOM(s) && !SBUF_CANEXTEND(s)) - SBUF_SETFLAG(s, SBUF_OVERFLOWED); + s->s_error = ENOMEM; KASSERT(s->s_len < s->s_size, ("wrote past end of sbuf (%d >= %d)", s->s_len, s->s_size)); - if (SBUF_HASOVERFLOWED(s)) + if (s->s_error != 0) return (-1); return (0); } @@ -632,8 +649,8 @@ int sbuf_putc(struct sbuf *s, int c) { - sbuf_putc_func(c, s); - if (SBUF_HASOVERFLOWED(s)) + sbuf_put_byte(s, c); + if (s->s_error != 0) return (-1); return (0); } @@ -647,26 +664,29 @@ sbuf_trim(struct sbuf *s) assert_sbuf_integrity(s); assert_sbuf_state(s, 0); - KASSERT(s->s_drain == NULL, + KASSERT(s->s_drain_func == NULL, ("%s makes no sense on sbuf %p with drain", __func__, s)); - if (SBUF_HASOVERFLOWED(s)) + if (s->s_error != 0) return (-1); - while (s->s_len > 0 && isspace(s->s_buf[s->s_len-1])) + while (s->s_len > 0 && isspace(s->s_buf[s->s_len-1])) { --s->s_len; + if (SBUF_ISSECTION(s)) + s->s_sect_len--; + } return (0); } /* - * Check if an sbuf overflowed + * Check if an sbuf has an error. */ int -sbuf_overflowed(struct sbuf *s) +sbuf_error(const struct sbuf *s) { - return (SBUF_HASOVERFLOWED(s)); + return (s->s_error); } /* @@ -675,28 +695,23 @@ sbuf_overflowed(struct sbuf *s) int sbuf_finish(struct sbuf *s) { - int error = 0; assert_sbuf_integrity(s); assert_sbuf_state(s, 0); - if (s->s_drain != NULL) { - error = s->s_drain->s_error; - while (s->s_len > 0 && error == 0) - error = sbuf_drain(s); - } else if (SBUF_HASOVERFLOWED(s)) - error = ENOMEM; + if (s->s_drain_func != NULL) { + while (s->s_len > 0 && s->s_error == 0) + s->s_error = sbuf_drain(s); + } s->s_buf[s->s_len] = '\0'; - SBUF_CLEARFLAG(s, SBUF_OVERFLOWED); SBUF_SETFLAG(s, SBUF_FINISHED); #ifdef _KERNEL - return (error); + return (s->s_error); #else - /*XXX*/if (error) { - errno = error; + errno = s->s_error; + if (s->s_error) return (-1); - } else - return (0); + return (0); #endif } @@ -709,7 +724,7 @@ sbuf_data(struct sbuf *s) assert_sbuf_integrity(s); assert_sbuf_state(s, SBUF_FINISHED); - KASSERT(s->s_drain == NULL, + KASSERT(s->s_drain_func == NULL, ("%s makes no sense on sbuf %p with drain", __func__, s)); return (s->s_buf); @@ -718,16 +733,16 @@ sbuf_data(struct sbuf *s) /* * Return the length of the sbuf data. */ -int +ssize_t sbuf_len(struct sbuf *s) { assert_sbuf_integrity(s); /* don't care if it's finished or not */ - KASSERT(s->s_drain == NULL, + KASSERT(s->s_drain_func == NULL, ("%s makes no sense on sbuf %p with drain", __func__, s)); - if (SBUF_HASOVERFLOWED(s)) + if (s->s_error != 0) return (-1); return (s->s_len); } @@ -745,10 +760,8 @@ sbuf_delete(struct sbuf *s) if (SBUF_ISDYNAMIC(s)) SBFREE(s->s_buf); - if (s->s_drain != NULL) - SBFREE(s->s_drain); isdyn = SBUF_ISDYNSTRUCT(s); - bzero(s, sizeof(*s)); + memset(s, 0, sizeof(*s)); if (isdyn) SBFREE(s); } @@ -757,8 +770,63 @@ sbuf_delete(struct sbuf *s) * Check if an sbuf has been finished. */ int -sbuf_done(struct sbuf *s) +sbuf_done(const struct sbuf *s) { return (SBUF_ISFINISHED(s)); } + +/* + * Start a section. + */ +void +sbuf_start_section(struct sbuf *s, ssize_t *old_lenp) +{ + + assert_sbuf_integrity(s); + assert_sbuf_state(s, 0); + + if (!SBUF_ISSECTION(s)) { + KASSERT(s->s_sect_len == 0, + ("s_sect_len != 0 when starting a section")); + if (old_lenp != NULL) + *old_lenp = -1; + SBUF_SETFLAG(s, SBUF_INSECTION); + } else { + KASSERT(old_lenp != NULL, + ("s_sect_len should be saved when starting a subsection")); + *old_lenp = s->s_sect_len; + s->s_sect_len = 0; + } +} + +/* + * End the section padding to the specified length with the specified + * character. + */ +ssize_t +sbuf_end_section(struct sbuf *s, ssize_t old_len, size_t pad, int c) +{ + ssize_t len; + + assert_sbuf_integrity(s); + assert_sbuf_state(s, 0); + KASSERT(SBUF_ISSECTION(s), + ("attempt to end a section when not in a section")); + + if (pad > 1) { + len = roundup(s->s_sect_len, pad) - s->s_sect_len; + for (; s->s_error == 0 && len > 0; len--) + sbuf_put_byte(s, c); + } + len = s->s_sect_len; + if (old_len == -1) { + s->s_sect_len = 0; + SBUF_CLEARFLAG(s, SBUF_INSECTION); + } else { + s->s_sect_len += old_len; + } + if (s->s_error != 0) + return (-1); + return (len); +} diff --git a/freebsd/sys/kern/subr_taskqueue.c b/freebsd/sys/kern/subr_taskqueue.c index be19c5b4..867b0e6b 100644 --- a/freebsd/sys/kern/subr_taskqueue.c +++ b/freebsd/sys/kern/subr_taskqueue.c @@ -68,34 +68,52 @@ struct taskqueue { int tq_spin; #endif /* __rtems__ */ int tq_flags; + int tq_callouts; }; #define TQ_FLAGS_ACTIVE (1 << 0) #define TQ_FLAGS_BLOCKED (1 << 1) #define TQ_FLAGS_PENDING (1 << 2) -static void taskqueue_run_locked(struct taskqueue *); +#define DT_CALLOUT_ARMED (1 << 0) -static __inline void -TQ_LOCK(struct taskqueue *tq) -{ #ifndef __rtems__ - if (tq->tq_spin) - mtx_lock_spin(&tq->tq_mutex); - else +#define TQ_LOCK(tq) \ + do { \ + if ((tq)->tq_spin) \ + mtx_lock_spin(&(tq)->tq_mutex); \ + else \ + mtx_lock(&(tq)->tq_mutex); \ + } while (0) + +#define TQ_UNLOCK(tq) \ + do { \ + if ((tq)->tq_spin) \ + mtx_unlock_spin(&(tq)->tq_mutex); \ + else \ + mtx_unlock(&(tq)->tq_mutex); \ + } while (0) +#else /* __rtems__ */ +#define TQ_LOCK(tq) \ + do { \ + mtx_lock(&(tq)->tq_mutex); \ + } while (0) + +#define TQ_UNLOCK(tq) \ + do { \ + mtx_unlock(&(tq)->tq_mutex); \ + } while (0) #endif /* __rtems__ */ - mtx_lock(&tq->tq_mutex); -} -static __inline void -TQ_UNLOCK(struct taskqueue *tq) +void +_timeout_task_init(struct taskqueue *queue, struct timeout_task *timeout_task, + int priority, task_fn_t func, void *context) { -#ifndef __rtems__ - if (tq->tq_spin) - mtx_unlock_spin(&tq->tq_mutex); - else -#endif /* __rtems__ */ - mtx_unlock(&tq->tq_mutex); + + TASK_INIT(&timeout_task->t, priority, func, context); + callout_init_mtx(&timeout_task->c, &queue->tq_mutex, 0); + timeout_task->q = queue; + timeout_task->f = 0; } static __inline int @@ -153,7 +171,7 @@ static void taskqueue_terminate(struct thread **pp, struct taskqueue *tq) { - while (tq->tq_tcount > 0) { + while (tq->tq_tcount > 0 || tq->tq_callouts > 0) { wakeup(tq); TQ_SLEEP(tq, pp, &tq->tq_mutex, PWAIT, "taskqueue_destroy", 0); } @@ -165,30 +183,27 @@ taskqueue_free(struct taskqueue *queue) TQ_LOCK(queue); queue->tq_flags &= ~TQ_FLAGS_ACTIVE; - taskqueue_run_locked(queue); taskqueue_terminate(queue->tq_threads, queue); KASSERT(TAILQ_EMPTY(&queue->tq_active), ("Tasks still running?")); + KASSERT(queue->tq_callouts == 0, ("Armed timeout tasks")); mtx_destroy(&queue->tq_mutex); free(queue->tq_threads, M_TASKQUEUE); free(queue, M_TASKQUEUE); } -int -taskqueue_enqueue(struct taskqueue *queue, struct task *task) +static int +taskqueue_enqueue_locked(struct taskqueue *queue, struct task *task) { struct task *ins; struct task *prev; - TQ_LOCK(queue); - /* * Count multiple enqueues. */ if (task->ta_pending) { if (task->ta_pending < USHRT_MAX) task->ta_pending++; - TQ_UNLOCK(queue); - return 0; + return (0); } /* @@ -216,9 +231,64 @@ taskqueue_enqueue(struct taskqueue *queue, struct task *task) else queue->tq_flags |= TQ_FLAGS_PENDING; + return (0); +} +int +taskqueue_enqueue(struct taskqueue *queue, struct task *task) +{ + int res; + + TQ_LOCK(queue); + res = taskqueue_enqueue_locked(queue, task); TQ_UNLOCK(queue); - return 0; + return (res); +} + +static void +taskqueue_timeout_func(void *arg) +{ + struct taskqueue *queue; + struct timeout_task *timeout_task; + + timeout_task = arg; + queue = timeout_task->q; + KASSERT((timeout_task->f & DT_CALLOUT_ARMED) != 0, ("Stray timeout")); + timeout_task->f &= ~DT_CALLOUT_ARMED; + queue->tq_callouts--; + taskqueue_enqueue_locked(timeout_task->q, &timeout_task->t); +} + +int +taskqueue_enqueue_timeout(struct taskqueue *queue, + struct timeout_task *timeout_task, int ticks) +{ + int res; + + TQ_LOCK(queue); + KASSERT(timeout_task->q == NULL || timeout_task->q == queue, + ("Migrated queue")); + KASSERT(!queue->tq_spin, ("Timeout for spin-queue")); + timeout_task->q = queue; + res = timeout_task->t.ta_pending; + if (ticks == 0) { + taskqueue_enqueue_locked(queue, &timeout_task->t); + } else { + if ((timeout_task->f & DT_CALLOUT_ARMED) != 0) { + res++; + } else { + queue->tq_callouts++; + timeout_task->f |= DT_CALLOUT_ARMED; + if (ticks < 0) + ticks = -ticks; /* Ignore overflow. */ + } + if (ticks > 0) { + callout_reset(&timeout_task->c, ticks, + taskqueue_timeout_func, timeout_task); + } + } + TQ_UNLOCK(queue); + return (res); } void @@ -297,26 +367,76 @@ task_is_running(struct taskqueue *queue, struct task *task) return (0); } +static int +taskqueue_cancel_locked(struct taskqueue *queue, struct task *task, + u_int *pendp) +{ + + if (task->ta_pending > 0) + STAILQ_REMOVE(&queue->tq_queue, task, task, ta_link); + if (pendp != NULL) + *pendp = task->ta_pending; + task->ta_pending = 0; + return (task_is_running(queue, task) ? EBUSY : 0); +} + +int +taskqueue_cancel(struct taskqueue *queue, struct task *task, u_int *pendp) +{ + u_int pending; + int error; + + TQ_LOCK(queue); + pending = task->ta_pending; + error = taskqueue_cancel_locked(queue, task, pendp); + TQ_UNLOCK(queue); + + return (error); +} + +int +taskqueue_cancel_timeout(struct taskqueue *queue, + struct timeout_task *timeout_task, u_int *pendp) +{ + u_int pending, pending1; + int error; + + TQ_LOCK(queue); + pending = !!callout_stop(&timeout_task->c); + error = taskqueue_cancel_locked(queue, &timeout_task->t, &pending1); + if ((timeout_task->f & DT_CALLOUT_ARMED) != 0) { + timeout_task->f &= ~DT_CALLOUT_ARMED; + queue->tq_callouts--; + } + TQ_UNLOCK(queue); + + if (pendp != NULL) + *pendp = pending + pending1; + return (error); +} + void taskqueue_drain(struct taskqueue *queue, struct task *task) { -#ifndef __rtems__ - if (queue->tq_spin) { /* XXX */ - mtx_lock_spin(&queue->tq_mutex); - while (task->ta_pending != 0 || task_is_running(queue, task)) - msleep_spin(task, &queue->tq_mutex, "-", 0); - mtx_unlock_spin(&queue->tq_mutex); - } else { -#endif /* __rtems__ */ - WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__); - mtx_lock(&queue->tq_mutex); - while (task->ta_pending != 0 || task_is_running(queue, task)) - msleep(task, &queue->tq_mutex, PWAIT, "-", 0); - mtx_unlock(&queue->tq_mutex); #ifndef __rtems__ - } + if (!queue->tq_spin) + WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__); #endif /* __rtems__ */ + + TQ_LOCK(queue); + while (task->ta_pending != 0 || task_is_running(queue, task)) + TQ_SLEEP(queue, task, &queue->tq_mutex, PWAIT, "-", 0); + TQ_UNLOCK(queue); +} + +void +taskqueue_drain_timeout(struct taskqueue *queue, + struct timeout_task *timeout_task) +{ + + callout_drain(&timeout_task->c); + taskqueue_drain(queue, &timeout_task->t); } static void @@ -423,6 +543,7 @@ taskqueue_thread_loop(void *arg) break; TQ_SLEEP(tq, tq, &tq->tq_mutex, 0, "-", 0); } + taskqueue_run_locked(tq); /* rendezvous with thread that asked us to terminate */ tq->tq_tcount--; diff --git a/freebsd/sys/kern/subr_uio.c b/freebsd/sys/kern/subr_uio.c new file mode 100644 index 00000000..74f01ffa --- /dev/null +++ b/freebsd/sys/kern/subr_uio.c @@ -0,0 +1,629 @@ +#include <machine/rtems-bsd-kernel-space.h> + +/*- + * Copyright (c) 1982, 1986, 1991, 1993 + * The Regents of the University of California. All rights reserved. + * (c) UNIX System Laboratories, Inc. + * All or some portions of this file are derived from material licensed + * to the University of California by American Telephone and Telegraph + * Co. or Unix System Laboratories, Inc. and are reproduced herein with + * the permission of UNIX System Laboratories, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)kern_subr.c 8.3 (Berkeley) 1/21/94 + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <rtems/bsd/local/opt_zero.h> + +#include <rtems/bsd/sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/limits.h> +#include <rtems/bsd/sys/lock.h> +#include <sys/mman.h> +#include <sys/mutex.h> +#include <sys/proc.h> +#include <sys/resourcevar.h> +#include <sys/sched.h> +#include <sys/sysctl.h> +#include <sys/vnode.h> + +#include <vm/vm.h> +#include <vm/vm_param.h> +#include <vm/vm_extern.h> +#include <vm/vm_page.h> +#include <vm/vm_map.h> +#ifdef ZERO_COPY_SOCKETS +#include <vm/vm_object.h> +#endif + +#ifndef __rtems__ +SYSCTL_INT(_kern, KERN_IOV_MAX, iov_max, CTLFLAG_RD, NULL, UIO_MAXIOV, + "Maximum number of elements in an I/O vector; sysconf(_SC_IOV_MAX)"); +#endif /* __rtems__ */ + +static int uiomove_faultflag(void *cp, int n, struct uio *uio, int nofault); + +#ifndef __rtems__ +#ifdef ZERO_COPY_SOCKETS +/* Declared in uipc_socket.c */ +extern int so_zero_copy_receive; + +/* + * Identify the physical page mapped at the given kernel virtual + * address. Insert this physical page into the given address space at + * the given virtual address, replacing the physical page, if any, + * that already exists there. + */ +static int +vm_pgmoveco(vm_map_t mapa, vm_offset_t kaddr, vm_offset_t uaddr) +{ + vm_map_t map = mapa; + vm_page_t kern_pg, user_pg; + vm_object_t uobject; + vm_map_entry_t entry; + vm_pindex_t upindex; + vm_prot_t prot; + boolean_t wired; + + KASSERT((uaddr & PAGE_MASK) == 0, + ("vm_pgmoveco: uaddr is not page aligned")); + + /* + * Herein the physical page is validated and dirtied. It is + * unwired in sf_buf_mext(). + */ + kern_pg = PHYS_TO_VM_PAGE(vtophys(kaddr)); + kern_pg->valid = VM_PAGE_BITS_ALL; + KASSERT(kern_pg->queue == PQ_NONE && kern_pg->wire_count == 1, + ("vm_pgmoveco: kern_pg is not correctly wired")); + + if ((vm_map_lookup(&map, uaddr, + VM_PROT_WRITE, &entry, &uobject, + &upindex, &prot, &wired)) != KERN_SUCCESS) { + return(EFAULT); + } + VM_OBJECT_LOCK(uobject); +retry: + if ((user_pg = vm_page_lookup(uobject, upindex)) != NULL) { + if (vm_page_sleep_if_busy(user_pg, TRUE, "vm_pgmoveco")) + goto retry; + vm_page_lock(user_pg); + pmap_remove_all(user_pg); + vm_page_free(user_pg); + vm_page_unlock(user_pg); + } else { + /* + * Even if a physical page does not exist in the + * object chain's first object, a physical page from a + * backing object may be mapped read only. + */ + if (uobject->backing_object != NULL) + pmap_remove(map->pmap, uaddr, uaddr + PAGE_SIZE); + } + vm_page_insert(kern_pg, uobject, upindex); + vm_page_dirty(kern_pg); + VM_OBJECT_UNLOCK(uobject); + vm_map_lookup_done(map, entry); + return(KERN_SUCCESS); +} +#endif /* ZERO_COPY_SOCKETS */ + +int +copyin_nofault(const void *udaddr, void *kaddr, size_t len) +{ + int error, save; + + save = vm_fault_disable_pagefaults(); + error = copyin(udaddr, kaddr, len); + vm_fault_enable_pagefaults(save); + return (error); +} + +int +copyout_nofault(const void *kaddr, void *udaddr, size_t len) +{ + int error, save; + + save = vm_fault_disable_pagefaults(); + error = copyout(kaddr, udaddr, len); + vm_fault_enable_pagefaults(save); + return (error); +} + +#define PHYS_PAGE_COUNT(len) (howmany(len, PAGE_SIZE) + 1) + +int +physcopyin(void *src, vm_paddr_t dst, size_t len) +{ + vm_page_t m[PHYS_PAGE_COUNT(len)]; + struct iovec iov[1]; + struct uio uio; + int i; + + iov[0].iov_base = src; + iov[0].iov_len = len; + uio.uio_iov = iov; + uio.uio_iovcnt = 1; + uio.uio_offset = 0; + uio.uio_resid = len; + uio.uio_segflg = UIO_SYSSPACE; + uio.uio_rw = UIO_WRITE; + for (i = 0; i < PHYS_PAGE_COUNT(len); i++, dst += PAGE_SIZE) + m[i] = PHYS_TO_VM_PAGE(dst); + return (uiomove_fromphys(m, dst & PAGE_MASK, len, &uio)); +} + +int +physcopyout(vm_paddr_t src, void *dst, size_t len) +{ + vm_page_t m[PHYS_PAGE_COUNT(len)]; + struct iovec iov[1]; + struct uio uio; + int i; + + iov[0].iov_base = dst; + iov[0].iov_len = len; + uio.uio_iov = iov; + uio.uio_iovcnt = 1; + uio.uio_offset = 0; + uio.uio_resid = len; + uio.uio_segflg = UIO_SYSSPACE; + uio.uio_rw = UIO_READ; + for (i = 0; i < PHYS_PAGE_COUNT(len); i++, src += PAGE_SIZE) + m[i] = PHYS_TO_VM_PAGE(src); + return (uiomove_fromphys(m, src & PAGE_MASK, len, &uio)); +} + +#undef PHYS_PAGE_COUNT +#endif /* __rtems__ */ + +int +uiomove(void *cp, int n, struct uio *uio) +{ + + return (uiomove_faultflag(cp, n, uio, 0)); +} + +int +uiomove_nofault(void *cp, int n, struct uio *uio) +{ + + return (uiomove_faultflag(cp, n, uio, 1)); +} + +static int +uiomove_faultflag(void *cp, int n, struct uio *uio, int nofault) +{ +#ifndef __rtems__ + struct thread *td; +#endif /* __rtems__ */ + struct iovec *iov; + size_t cnt; + int error, newflags, save; + +#ifndef __rtems__ + td = curthread; +#endif /* __rtems__ */ + error = 0; + + KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE, + ("uiomove: mode")); + KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == td, + ("uiomove proc")); + if (!nofault) + WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, + "Calling uiomove()"); + +#ifndef __rtems__ + /* XXX does it make a sense to set TDP_DEADLKTREAT for UIO_SYSSPACE ? */ + newflags = TDP_DEADLKTREAT; + if (uio->uio_segflg == UIO_USERSPACE && nofault) { + /* + * Fail if a non-spurious page fault occurs. + */ + newflags |= TDP_NOFAULTING | TDP_RESETSPUR; + } + save = curthread_pflags_set(newflags); +#else /* __rtems__ */ + (void) newflags; + (void) save; +#endif /* __rtems__ */ + + while (n > 0 && uio->uio_resid) { + iov = uio->uio_iov; + cnt = iov->iov_len; + if (cnt == 0) { + uio->uio_iov++; + uio->uio_iovcnt--; + continue; + } + if (cnt > n) + cnt = n; + + switch (uio->uio_segflg) { + + case UIO_USERSPACE: +#ifndef __rtems__ + maybe_yield(); +#endif /* __rtems__ */ + if (uio->uio_rw == UIO_READ) + error = copyout(cp, iov->iov_base, cnt); + else + error = copyin(iov->iov_base, cp, cnt); + if (error) + goto out; + break; + + case UIO_SYSSPACE: + if (uio->uio_rw == UIO_READ) + bcopy(cp, iov->iov_base, cnt); + else + bcopy(iov->iov_base, cp, cnt); + break; + case UIO_NOCOPY: + break; + } + iov->iov_base = (char *)iov->iov_base + cnt; + iov->iov_len -= cnt; + uio->uio_resid -= cnt; + uio->uio_offset += cnt; + cp = (char *)cp + cnt; + n -= cnt; + } +out: +#ifndef __rtems__ + curthread_pflags_restore(save); +#endif /* __rtems__ */ + return (error); +} + +#ifndef __rtems__ +/* + * Wrapper for uiomove() that validates the arguments against a known-good + * kernel buffer. Currently, uiomove accepts a signed (n) argument, which + * is almost definitely a bad thing, so we catch that here as well. We + * return a runtime failure, but it might be desirable to generate a runtime + * assertion failure instead. + */ +int +uiomove_frombuf(void *buf, int buflen, struct uio *uio) +{ + size_t offset, n; + + if (uio->uio_offset < 0 || uio->uio_resid < 0 || + (offset = uio->uio_offset) != uio->uio_offset) + return (EINVAL); + if (buflen <= 0 || offset >= buflen) + return (0); + if ((n = buflen - offset) > IOSIZE_MAX) + return (EINVAL); + return (uiomove((char *)buf + offset, n, uio)); +} + +#ifdef ZERO_COPY_SOCKETS +/* + * Experimental support for zero-copy I/O + */ +static int +userspaceco(void *cp, u_int cnt, struct uio *uio, int disposable) +{ + struct iovec *iov; + int error; + + iov = uio->uio_iov; + if (uio->uio_rw == UIO_READ) { + if ((so_zero_copy_receive != 0) + && ((cnt & PAGE_MASK) == 0) + && ((((intptr_t) iov->iov_base) & PAGE_MASK) == 0) + && ((uio->uio_offset & PAGE_MASK) == 0) + && ((((intptr_t) cp) & PAGE_MASK) == 0) + && (disposable != 0)) { + /* SOCKET: use page-trading */ + /* + * We only want to call vm_pgmoveco() on + * disposeable pages, since it gives the + * kernel page to the userland process. + */ + error = vm_pgmoveco(&curproc->p_vmspace->vm_map, + (vm_offset_t)cp, (vm_offset_t)iov->iov_base); + + /* + * If we get an error back, attempt + * to use copyout() instead. The + * disposable page should be freed + * automatically if we weren't able to move + * it into userland. + */ + if (error != 0) + error = copyout(cp, iov->iov_base, cnt); + } else { + error = copyout(cp, iov->iov_base, cnt); + } + } else { + error = copyin(iov->iov_base, cp, cnt); + } + return (error); +} + +int +uiomoveco(void *cp, int n, struct uio *uio, int disposable) +{ + struct iovec *iov; + u_int cnt; + int error; + + KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE, + ("uiomoveco: mode")); + KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == curthread, + ("uiomoveco proc")); + + while (n > 0 && uio->uio_resid) { + iov = uio->uio_iov; + cnt = iov->iov_len; + if (cnt == 0) { + uio->uio_iov++; + uio->uio_iovcnt--; + continue; + } + if (cnt > n) + cnt = n; + + switch (uio->uio_segflg) { + + case UIO_USERSPACE: + maybe_yield(); + error = userspaceco(cp, cnt, uio, disposable); + if (error) + return (error); + break; + + case UIO_SYSSPACE: + if (uio->uio_rw == UIO_READ) + bcopy(cp, iov->iov_base, cnt); + else + bcopy(iov->iov_base, cp, cnt); + break; + case UIO_NOCOPY: + break; + } + iov->iov_base = (char *)iov->iov_base + cnt; + iov->iov_len -= cnt; + uio->uio_resid -= cnt; + uio->uio_offset += cnt; + cp = (char *)cp + cnt; + n -= cnt; + } + return (0); +} +#endif /* ZERO_COPY_SOCKETS */ + +/* + * Give next character to user as result of read. + */ +int +ureadc(int c, struct uio *uio) +{ + struct iovec *iov; + char *iov_base; + + WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, + "Calling ureadc()"); + +again: + if (uio->uio_iovcnt == 0 || uio->uio_resid == 0) + panic("ureadc"); + iov = uio->uio_iov; + if (iov->iov_len == 0) { + uio->uio_iovcnt--; + uio->uio_iov++; + goto again; + } + switch (uio->uio_segflg) { + + case UIO_USERSPACE: + if (subyte(iov->iov_base, c) < 0) + return (EFAULT); + break; + + case UIO_SYSSPACE: + iov_base = iov->iov_base; + *iov_base = c; + iov->iov_base = iov_base; + break; + + case UIO_NOCOPY: + break; + } + iov->iov_base = (char *)iov->iov_base + 1; + iov->iov_len--; + uio->uio_resid--; + uio->uio_offset++; + return (0); +} + +int +copyinfrom(const void * __restrict src, void * __restrict dst, size_t len, + int seg) +{ + int error = 0; + + switch (seg) { + case UIO_USERSPACE: + error = copyin(src, dst, len); + break; + case UIO_SYSSPACE: + bcopy(src, dst, len); + break; + default: + panic("copyinfrom: bad seg %d\n", seg); + } + return (error); +} + +int +copyinstrfrom(const void * __restrict src, void * __restrict dst, size_t len, + size_t * __restrict copied, int seg) +{ + int error = 0; + + switch (seg) { + case UIO_USERSPACE: + error = copyinstr(src, dst, len, copied); + break; + case UIO_SYSSPACE: + error = copystr(src, dst, len, copied); + break; + default: + panic("copyinstrfrom: bad seg %d\n", seg); + } + return (error); +} +#endif /* __rtems__ */ + +int +copyiniov(struct iovec *iovp, u_int iovcnt, struct iovec **iov, int error) +{ + u_int iovlen; + + *iov = NULL; + if (iovcnt > UIO_MAXIOV) + return (error); + iovlen = iovcnt * sizeof (struct iovec); + *iov = malloc(iovlen, M_IOV, M_WAITOK); + error = copyin(iovp, *iov, iovlen); + if (error) { + free(*iov, M_IOV); + *iov = NULL; + } + return (error); +} + +#ifndef __rtems__ +int +copyinuio(struct iovec *iovp, u_int iovcnt, struct uio **uiop) +{ + struct iovec *iov; + struct uio *uio; + u_int iovlen; + int error, i; + + *uiop = NULL; + if (iovcnt > UIO_MAXIOV) + return (EINVAL); + iovlen = iovcnt * sizeof (struct iovec); + uio = malloc(iovlen + sizeof *uio, M_IOV, M_WAITOK); + iov = (struct iovec *)(uio + 1); + error = copyin(iovp, iov, iovlen); + if (error) { + free(uio, M_IOV); + return (error); + } + uio->uio_iov = iov; + uio->uio_iovcnt = iovcnt; + uio->uio_segflg = UIO_USERSPACE; + uio->uio_offset = -1; + uio->uio_resid = 0; + for (i = 0; i < iovcnt; i++) { + if (iov->iov_len > IOSIZE_MAX - uio->uio_resid) { + free(uio, M_IOV); + return (EINVAL); + } + uio->uio_resid += iov->iov_len; + iov++; + } + *uiop = uio; + return (0); +} + +struct uio * +cloneuio(struct uio *uiop) +{ + struct uio *uio; + int iovlen; + + iovlen = uiop->uio_iovcnt * sizeof (struct iovec); + uio = malloc(iovlen + sizeof *uio, M_IOV, M_WAITOK); + *uio = *uiop; + uio->uio_iov = (struct iovec *)(uio + 1); + bcopy(uiop->uio_iov, uio->uio_iov, iovlen); + return (uio); +} + +/* + * Map some anonymous memory in user space of size sz, rounded up to the page + * boundary. + */ +int +copyout_map(struct thread *td, vm_offset_t *addr, size_t sz) +{ + struct vmspace *vms; + int error; + vm_size_t size; + + vms = td->td_proc->p_vmspace; + + /* + * Map somewhere after heap in process memory. + */ + PROC_LOCK(td->td_proc); + *addr = round_page((vm_offset_t)vms->vm_daddr + + lim_max(td->td_proc, RLIMIT_DATA)); + PROC_UNLOCK(td->td_proc); + + /* round size up to page boundry */ + size = (vm_size_t)round_page(sz); + + error = vm_mmap(&vms->vm_map, addr, size, PROT_READ | PROT_WRITE, + VM_PROT_ALL, MAP_PRIVATE | MAP_ANON, OBJT_DEFAULT, NULL, 0); + + return (error); +} + +/* + * Unmap memory in user space. + */ +int +copyout_unmap(struct thread *td, vm_offset_t addr, size_t sz) +{ + vm_map_t map; + vm_size_t size; + + if (sz == 0) + return (0); + + map = &td->td_proc->p_vmspace->vm_map; + size = (vm_size_t)round_page(sz); + + if (vm_map_remove(map, addr, addr + size) != KERN_SUCCESS) + return (EINVAL); + + return (0); +} +#endif /* __rtems__ */ diff --git a/freebsd/sys/kern/sys_generic.c b/freebsd/sys/kern/sys_generic.c index 7cc2a516..fefc94d9 100644 --- a/freebsd/sys/kern/sys_generic.c +++ b/freebsd/sys/kern/sys_generic.c @@ -39,12 +39,14 @@ #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); +#include <rtems/bsd/local/opt_capsicum.h> #include <rtems/bsd/local/opt_compat.h> #include <rtems/bsd/local/opt_ktrace.h> #include <rtems/bsd/sys/param.h> #include <sys/systm.h> #include <sys/sysproto.h> +#include <sys/capability.h> #include <sys/filedesc.h> #include <sys/filio.h> #include <sys/fcntl.h> @@ -78,6 +80,16 @@ __FBSDID("$FreeBSD$"); #endif /* __rtems__ */ #ifndef __rtems__ +int iosize_max_clamp = 1; +SYSCTL_INT(_debug, OID_AUTO, iosize_max_clamp, CTLFLAG_RW, + &iosize_max_clamp, 0, "Clamp max i/o size to INT_MAX"); +/* + * Assert that the return value of read(2) and write(2) syscalls fits + * into a register. If not, an architecture will need to provide the + * usermode wrappers to reconstruct the result. + */ +CTASSERT(sizeof(register_t) >= sizeof(size_t)); + static MALLOC_DEFINE(M_IOCTLOPS, "ioctlops", "ioctl data buffer"); #endif /* __rtems__ */ static MALLOC_DEFINE(M_SELECT, "select", "select() buffer"); @@ -147,7 +159,7 @@ struct read_args { }; #endif int -read(td, uap) +sys_read(td, uap) struct thread *td; struct read_args *uap; { @@ -155,7 +167,7 @@ read(td, uap) struct iovec aiov; int error; - if (uap->nbyte > INT_MAX) + if (uap->nbyte > IOSIZE_MAX) return (EINVAL); aiov.iov_base = uap->buf; aiov.iov_len = uap->nbyte; @@ -180,7 +192,7 @@ struct pread_args { }; #endif int -pread(td, uap) +sys_pread(td, uap) struct thread *td; struct pread_args *uap; { @@ -188,7 +200,7 @@ pread(td, uap) struct iovec aiov; int error; - if (uap->nbyte > INT_MAX) + if (uap->nbyte > IOSIZE_MAX) return (EINVAL); aiov.iov_base = uap->buf; aiov.iov_len = uap->nbyte; @@ -211,7 +223,7 @@ freebsd6_pread(td, uap) oargs.buf = uap->buf; oargs.nbyte = uap->nbyte; oargs.offset = uap->offset; - return (pread(td, &oargs)); + return (sys_pread(td, &oargs)); } /* @@ -225,7 +237,7 @@ struct readv_args { }; #endif int -readv(struct thread *td, struct readv_args *uap) +sys_readv(struct thread *td, struct readv_args *uap) { struct uio *auio; int error; @@ -244,7 +256,7 @@ kern_readv(struct thread *td, int fd, struct uio *auio) struct file *fp; int error; - error = fget_read(td, fd, &fp); + error = fget_read(td, fd, CAP_READ | CAP_SEEK, &fp); if (error) return (error); error = dofileread(td, fd, fp, auio, (off_t)-1, 0); @@ -264,7 +276,7 @@ struct preadv_args { }; #endif int -preadv(struct thread *td, struct preadv_args *uap) +sys_preadv(struct thread *td, struct preadv_args *uap) { struct uio *auio; int error; @@ -287,7 +299,7 @@ kern_preadv(td, fd, auio, offset) struct file *fp; int error; - error = fget_read(td, fd, &fp); + error = fget_read(td, fd, CAP_READ, &fp); if (error) return (error); if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE)) @@ -356,7 +368,7 @@ struct write_args { }; #endif int -write(td, uap) +sys_write(td, uap) struct thread *td; struct write_args *uap; { @@ -364,7 +376,7 @@ write(td, uap) struct iovec aiov; int error; - if (uap->nbyte > INT_MAX) + if (uap->nbyte > IOSIZE_MAX) return (EINVAL); aiov.iov_base = (void *)(uintptr_t)uap->buf; aiov.iov_len = uap->nbyte; @@ -389,7 +401,7 @@ struct pwrite_args { }; #endif int -pwrite(td, uap) +sys_pwrite(td, uap) struct thread *td; struct pwrite_args *uap; { @@ -397,7 +409,7 @@ pwrite(td, uap) struct iovec aiov; int error; - if (uap->nbyte > INT_MAX) + if (uap->nbyte > IOSIZE_MAX) return (EINVAL); aiov.iov_base = (void *)(uintptr_t)uap->buf; aiov.iov_len = uap->nbyte; @@ -420,7 +432,7 @@ freebsd6_pwrite(td, uap) oargs.buf = uap->buf; oargs.nbyte = uap->nbyte; oargs.offset = uap->offset; - return (pwrite(td, &oargs)); + return (sys_pwrite(td, &oargs)); } /* @@ -434,7 +446,7 @@ struct writev_args { }; #endif int -writev(struct thread *td, struct writev_args *uap) +sys_writev(struct thread *td, struct writev_args *uap) { struct uio *auio; int error; @@ -453,7 +465,7 @@ kern_writev(struct thread *td, int fd, struct uio *auio) struct file *fp; int error; - error = fget_write(td, fd, &fp); + error = fget_write(td, fd, CAP_WRITE | CAP_SEEK, &fp); if (error) return (error); error = dofilewrite(td, fd, fp, auio, (off_t)-1, 0); @@ -473,7 +485,7 @@ struct pwritev_args { }; #endif int -pwritev(struct thread *td, struct pwritev_args *uap) +sys_pwritev(struct thread *td, struct pwritev_args *uap) { struct uio *auio; int error; @@ -496,7 +508,7 @@ kern_pwritev(td, fd, auio, offset) struct file *fp; int error; - error = fget_write(td, fd, &fp); + error = fget_write(td, fd, CAP_WRITE, &fp); if (error) return (error); if (!(fp->f_ops->fo_flags & DFLAG_SEEKABLE)) @@ -536,7 +548,8 @@ dofilewrite(td, fd, fp, auio, offset, flags) ktruio = cloneuio(auio); #endif cnt = auio->uio_resid; - if (fp->f_type == DTYPE_VNODE) + if (fp->f_type == DTYPE_VNODE && + (fp->f_vnread_flags & FDEVFS_VNODE) == 0) bwillwrite(); if ((error = fo_write(fp, auio, td->td_ucred, flags, td))) { if (auio->uio_resid != cnt && (error == ERESTART || @@ -545,7 +558,7 @@ dofilewrite(td, fd, fp, auio, offset, flags) /* Socket layer is responsible for issuing SIGPIPE. */ if (fp->f_type != DTYPE_SOCKET && error == EPIPE) { PROC_LOCK(td->td_proc); - tdksignal(td, SIGPIPE, NULL); + tdsignal(td, SIGPIPE); PROC_UNLOCK(td->td_proc); } } @@ -578,7 +591,7 @@ kern_ftruncate(td, fd, length) AUDIT_ARG_FD(fd); if (length < 0) return (EINVAL); - error = fget(td, fd, &fp); + error = fget(td, fd, CAP_FTRUNCATE, &fp); if (error) return (error); AUDIT_ARG_FILE(td->td_proc, fp); @@ -599,7 +612,7 @@ struct ftruncate_args { }; #endif int -ftruncate(td, uap) +sys_ftruncate(td, uap) struct thread *td; struct ftruncate_args *uap; { @@ -633,7 +646,7 @@ struct ioctl_args { #endif /* ARGSUSED */ int -ioctl(struct thread *td, struct ioctl_args *uap) +sys_ioctl(struct thread *td, struct ioctl_args *uap) { u_long com; int arg, error; @@ -708,7 +721,7 @@ kern_ioctl(struct thread *td, int fd, u_long com, caddr_t data) AUDIT_ARG_FD(fd); AUDIT_ARG_CMD(com); - if ((error = fget(td, fd, &fp)) != 0) + if ((error = fget(td, fd, CAP_IOCTL, &fp)) != 0) return (error); if ((fp->f_flag & (FREAD | FWRITE)) == 0) { fdrop(fp, td); @@ -765,7 +778,7 @@ poll_no_poll(int events) } int -pselect(struct thread *td, struct pselect_args *uap) +sys_pselect(struct thread *td, struct pselect_args *uap) { struct timespec ts; struct timeval tv, *tvp; @@ -824,7 +837,7 @@ struct select_args { }; #endif int -select(struct thread *td, struct select_args *uap) +sys_select(struct thread *td, struct select_args *uap) { struct timeval tv, *tvp; int error; @@ -1157,6 +1170,37 @@ selsetbits(fd_mask **ibits, fd_mask **obits, int idx, fd_mask bit, int events) return (n); } +static __inline int +getselfd_cap(struct filedesc *fdp, int fd, struct file **fpp) +{ + struct file *fp; +#ifdef CAPABILITIES + struct file *fp_fromcap; + int error; +#endif + + if ((fp = fget_unlocked(fdp, fd)) == NULL) + return (EBADF); +#ifdef CAPABILITIES + /* + * If the file descriptor is for a capability, test rights and use + * the file descriptor references by the capability. + */ + error = cap_funwrap(fp, CAP_POLL_EVENT, &fp_fromcap); + if (error) { + fdrop(fp, curthread); + return (error); + } + if (fp != fp_fromcap) { + fhold(fp_fromcap); + fdrop(fp, curthread); + fp = fp_fromcap; + } +#endif /* CAPABILITIES */ + *fpp = fp; + return (0); +} + /* * Traverse the list of fds attached to this thread's seltd and check for * completion. @@ -1172,6 +1216,7 @@ selrescan(struct thread *td, fd_mask **ibits, fd_mask **obits) struct file *fp; fd_mask bit; int fd, ev, n, idx; + int error; #ifndef __rtems__ fdp = td->td_proc->p_fd; @@ -1187,8 +1232,9 @@ selrescan(struct thread *td, fd_mask **ibits, fd_mask **obits) /* If the selinfo wasn't cleared the event didn't fire. */ if (si != NULL) continue; - if ((fp = fget_unlocked(fdp, fd)) == NULL) - return (EBADF); + error = getselfd_cap(fdp, fd, &fp); + if (error) + return (error); idx = fd / NFDBITS; bit = (fd_mask)1 << (fd % NFDBITS); ev = fo_poll(fp, selflags(ibits, idx, bit), td->td_ucred, td); @@ -1216,6 +1262,7 @@ selscan(td, ibits, obits, nfd) fd_mask bit; int ev, flags, end, fd; int n, idx; + int error; #ifndef __rtems__ fdp = td->td_proc->p_fd; @@ -1230,8 +1277,9 @@ selscan(td, ibits, obits, nfd) flags = selflags(ibits, idx, bit); if (flags == 0) continue; - if ((fp = fget_unlocked(fdp, fd)) == NULL) - return (EBADF); + error = getselfd_cap(fdp, fd, &fp); + if (error) + return (error); selfdalloc(td, (void *)(uintptr_t)fd); ev = fo_poll(fp, flags, td->td_ucred, td); fdrop(fp, td); @@ -1251,13 +1299,11 @@ struct poll_args { int timeout; }; #endif -#ifndef __rtems__ -int -poll(td, uap) -#else /* __rtems__ */ -static int -rtems_bsd_poll(td, uap) +#ifdef __rtems__ +static #endif /* __rtems__ */ +int +sys_poll(td, uap) struct thread *td; struct poll_args *uap; { @@ -1350,7 +1396,7 @@ poll(struct pollfd fds[], nfds_t nfds, int timeout) int error; if (td != NULL) { - error = rtems_bsd_poll(td, &ua); + error = sys_poll(td, &ua); } else { error = ENOMEM; } @@ -1395,11 +1441,17 @@ pollrescan(struct thread *td) #else /* __rtems__ */ fp = fget_unlocked(fdp, fd->fd); #endif /* __rtems__ */ +#ifdef CAPABILITIES + if ((fp == NULL) + || (cap_funwrap(fp, CAP_POLL_EVENT, &fp) != 0)) { +#else if (fp == NULL) { +#endif fd->revents = POLLNVAL; n++; continue; } + /* * Note: backend also returns POLLHUP and * POLLERR if appropriate. @@ -1472,7 +1524,12 @@ pollscan(td, fds, nfd) #else /* __rtems__ */ fp = fget_unlocked(fdp, fds->fd); #endif /* __rtems__ */ +#ifdef CAPABILITIES + if ((fp == NULL) + || (cap_funwrap(fp, CAP_POLL_EVENT, &fp) != 0)) { +#else if (fp == NULL) { +#endif fds->revents = POLLNVAL; n++; } else { @@ -1514,11 +1571,11 @@ struct openbsd_poll_args { }; #endif int -openbsd_poll(td, uap) +sys_openbsd_poll(td, uap) register struct thread *td; register struct openbsd_poll_args *uap; { - return (poll(td, (struct poll_args *)uap)); + return (sys_poll(td, (struct poll_args *)uap)); } /* diff --git a/freebsd/sys/kern/sys_socket.c b/freebsd/sys/kern/sys_socket.c index 0c1efdf0..08777991 100644 --- a/freebsd/sys/kern/sys_socket.c +++ b/freebsd/sys/kern/sys_socket.c @@ -67,6 +67,8 @@ struct fileops socketops = { .fo_kqfilter = soo_kqfilter, .fo_stat = soo_stat, .fo_close = soo_close, + .fo_chmod = invfo_chmod, + .fo_chown = invfo_chown, .fo_flags = DFLAG_PASSABLE }; #endif /* __rtems__ */ @@ -145,7 +147,7 @@ soo_write(struct file *fp, struct uio *uio, struct ucred *active_cred, if (error == EPIPE && (so->so_options & SO_NOSIGPIPE) == 0) { #ifndef __rtems__ PROC_LOCK(uio->uio_td->td_proc); - tdksignal(uio->uio_td, SIGPIPE, NULL); + tdsignal(uio->uio_td, SIGPIPE); PROC_UNLOCK(uio->uio_td->td_proc); #else /* __rtems__ */ /* FIXME: Determine if we really want to use signals */ diff --git a/freebsd/sys/kern/uipc_domain.c b/freebsd/sys/kern/uipc_domain.c index 450c1d8c..3af4448c 100644 --- a/freebsd/sys/kern/uipc_domain.c +++ b/freebsd/sys/kern/uipc_domain.c @@ -241,28 +241,11 @@ domain_add(void *data) mtx_unlock(&dom_mtx); } -static void -socket_zone_change(void *tag) -{ - - uma_zone_set_max(socket_zone, maxsockets); -} - /* ARGSUSED*/ static void domaininit(void *dummy) { - /* - * Before we do any setup, make sure to initialize the - * zone allocator we get struct sockets from. - */ - socket_zone = uma_zcreate("socket", sizeof(struct socket), NULL, NULL, - NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); - uma_zone_set_max(socket_zone, maxsockets); - EVENTHANDLER_REGISTER(maxsockets_change, socket_zone_change, NULL, - EVENTHANDLER_PRI_FIRST); - if (max_linkhdr < 16) /* XXX */ max_linkhdr = 16; diff --git a/freebsd/sys/kern/uipc_mbuf.c b/freebsd/sys/kern/uipc_mbuf.c index 3b0a5fe9..abf4dd3e 100644 --- a/freebsd/sys/kern/uipc_mbuf.c +++ b/freebsd/sys/kern/uipc_mbuf.c @@ -913,8 +913,8 @@ m_cat(struct mbuf *m, struct mbuf *n) while (m->m_next) m = m->m_next; while (n) { - if (m->m_flags & M_EXT || - m->m_data + m->m_len + n->m_len >= &m->m_dat[MLEN]) { + if (!M_WRITABLE(m) || + M_TRAILINGSPACE(m) < n->m_len) { /* just join the two chains */ m->m_next = n; return; @@ -1586,7 +1586,7 @@ again: n = m->m_next; if (n == NULL) break; - if ((m->m_flags & M_RDONLY) == 0 && + if (M_WRITABLE(m) && n->m_len < M_TRAILINGSPACE(m)) { bcopy(mtod(n, void *), mtod(m, char *) + m->m_len, n->m_len); @@ -1728,7 +1728,8 @@ struct mbuf * m_uiotombuf(struct uio *uio, int how, int len, int align, int flags) { struct mbuf *m, *mb; - int error, length, total; + int error, length; + ssize_t total; int progress = 0; /* @@ -2035,7 +2036,7 @@ mbprof_textify(void) { int offset; char *c; - u_int64_t *p; + uint64_t *p; p = &mbprof.wasted[0]; diff --git a/freebsd/sys/kern/uipc_mbuf2.c b/freebsd/sys/kern/uipc_mbuf2.c index a2e4e395..a44ddc27 100644 --- a/freebsd/sys/kern/uipc_mbuf2.c +++ b/freebsd/sys/kern/uipc_mbuf2.c @@ -316,7 +316,7 @@ m_tag_free_default(struct m_tag *t) /* Get a packet tag structure along with specified data following. */ struct m_tag * -m_tag_alloc(u_int32_t cookie, int type, int len, int wait) +m_tag_alloc(uint32_t cookie, int type, int len, int wait) { struct m_tag *t; @@ -378,7 +378,7 @@ m_tag_delete_nonpersistent(struct mbuf *m) /* Find a tag, starting from a given position. */ struct m_tag * -m_tag_locate(struct mbuf *m, u_int32_t cookie, int type, struct m_tag *t) +m_tag_locate(struct mbuf *m, uint32_t cookie, int type, struct m_tag *t) { struct m_tag *p; diff --git a/freebsd/sys/kern/uipc_sockbuf.c b/freebsd/sys/kern/uipc_sockbuf.c index b89abc84..2a0e527d 100644 --- a/freebsd/sys/kern/uipc_sockbuf.c +++ b/freebsd/sys/kern/uipc_sockbuf.c @@ -63,7 +63,7 @@ void (*aio_swake)(struct socket *, struct sockbuf *); u_long sb_max = SB_MAX; u_long sb_max_adj = - SB_MAX * MCLBYTES / (MSIZE + MCLBYTES); /* adjusted sb_max */ + (quad_t)SB_MAX * MCLBYTES / (MSIZE + MCLBYTES); /* adjusted sb_max */ static u_long sb_efficiency = 8; /* parameter for sbreserve() */ @@ -945,6 +945,13 @@ sbsndptr(struct sockbuf *sb, u_int off, u_int len, u_int *moff) /* Return closest mbuf in chain for current offset. */ *moff = off - sb->sb_sndptroff; m = ret = sb->sb_sndptr ? sb->sb_sndptr : sb->sb_mb; + if (*moff == m->m_len) { + *moff = 0; + sb->sb_sndptroff += m->m_len; + m = ret = m->m_next; + KASSERT(ret->m_len > 0, + ("mbuf %p in sockbuf %p chain has no valid data", ret, sb)); + } /* Advance by len to be as close as possible for the next transmit. */ for (off = off - sb->sb_sndptroff + len - 1; diff --git a/freebsd/sys/kern/uipc_socket.c b/freebsd/sys/kern/uipc_socket.c index 227e6dac..9ca2c14c 100644 --- a/freebsd/sys/kern/uipc_socket.c +++ b/freebsd/sys/kern/uipc_socket.c @@ -137,6 +137,8 @@ __FBSDID("$FreeBSD$"); #include <sys/sysctl.h> #include <sys/uio.h> #include <sys/jail.h> +#include <sys/syslog.h> +#include <netinet/in.h> #include <net/vnet.h> @@ -159,18 +161,24 @@ static void filt_sowdetach(struct knote *kn); static int filt_sowrite(struct knote *kn, long hint); static int filt_solisten(struct knote *kn, long hint); -static struct filterops solisten_filtops = - { 1, NULL, filt_sordetach, filt_solisten }; -static struct filterops soread_filtops = - { 1, NULL, filt_sordetach, filt_soread }; -static struct filterops sowrite_filtops = - { 1, NULL, filt_sowdetach, filt_sowrite }; +static struct filterops solisten_filtops = { + .f_isfd = 1, + .f_detach = filt_sordetach, + .f_event = filt_solisten, +}; +static struct filterops soread_filtops = { + .f_isfd = 1, + .f_detach = filt_sordetach, + .f_event = filt_soread, +}; +static struct filterops sowrite_filtops = { + .f_isfd = 1, + .f_detach = filt_sowdetach, + .f_event = filt_sowrite, +}; -uma_zone_t socket_zone; so_gen_t so_gencnt; /* generation count for sockets */ -int maxsockets; - MALLOC_DEFINE(M_SONAME, "soname", "socket name"); MALLOC_DEFINE(M_PCB, "pcb", "protocol control block"); @@ -178,15 +186,37 @@ MALLOC_DEFINE(M_PCB, "pcb", "protocol control block"); VNET_ASSERT(curvnet != NULL, \ ("%s:%d curvnet is NULL, so=%p", __func__, __LINE__, (so))); +/* + * Limit on the number of connections in the listen queue waiting + * for accept(2). + */ static int somaxconn = SOMAXCONN; -static int sysctl_somaxconn(SYSCTL_HANDLER_ARGS); -/* XXX: we dont have SYSCTL_USHORT */ + +static int +sysctl_somaxconn(SYSCTL_HANDLER_ARGS) +{ + int error; + int val; + + val = somaxconn; + error = sysctl_handle_int(oidp, &val, 0, req); + if (error || !req->newptr ) + return (error); + + if (val < 1 || val > USHRT_MAX) + return (EINVAL); + + somaxconn = val; + return (0); +} SYSCTL_PROC(_kern_ipc, KIPC_SOMAXCONN, somaxconn, CTLTYPE_UINT | CTLFLAG_RW, - 0, sizeof(int), sysctl_somaxconn, "I", "Maximum pending socket connection " - "queue size"); + 0, sizeof(int), sysctl_somaxconn, "I", + "Maximum listen socket pending connection accept queue size"); + static int numopensockets; SYSCTL_INT(_kern_ipc, OID_AUTO, numopensockets, CTLFLAG_RD, &numopensockets, 0, "Number of open sockets"); + #ifdef ZERO_COPY_SOCKETS /* These aren't static because they're used in other files. */ int so_zero_copy_send = 1; @@ -220,6 +250,49 @@ MTX_SYSINIT(so_global_mtx, &so_global_mtx, "so_glabel", MTX_DEF); SYSCTL_NODE(_kern, KERN_IPC, ipc, CTLFLAG_RW, 0, "IPC"); /* + * Initialize the socket subsystem and set up the socket + * memory allocator. + */ +uma_zone_t socket_zone; +int maxsockets; + +static void +socket_zone_change(void *tag) +{ + + uma_zone_set_max(socket_zone, maxsockets); +} + +static void +socket_init(void *tag) +{ + + socket_zone = uma_zcreate("socket", sizeof(struct socket), NULL, NULL, + NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); + uma_zone_set_max(socket_zone, maxsockets); + EVENTHANDLER_REGISTER(maxsockets_change, socket_zone_change, NULL, + EVENTHANDLER_PRI_FIRST); +} +SYSINIT(socket, SI_SUB_PROTO_DOMAININIT, SI_ORDER_ANY, socket_init, NULL); + +/* + * Initialise maxsockets. This SYSINIT must be run after + * tunable_mbinit(). + */ +static void +init_maxsockets(void *ignored) +{ + + TUNABLE_INT_FETCH("kern.ipc.maxsockets", &maxsockets); +#ifndef __rtems__ + maxsockets = imax(maxsockets, imax(maxfiles, nmbclusters)); +#else /* __rtems__ */ + maxsockets = imax(maxsockets, nmbclusters); +#endif /* __rtems__ */ +} +SYSINIT(param, SI_SUB_TUNABLES, SI_ORDER_ANY, init_maxsockets, NULL); + +/* * Sysctl to get and set the maximum global sockets limit. Notify protocols * of the change so that they can update their dependent limits as required. */ @@ -245,29 +318,11 @@ sysctl_maxsockets(SYSCTL_HANDLER_ARGS) } return (error); } - SYSCTL_PROC(_kern_ipc, OID_AUTO, maxsockets, CTLTYPE_INT|CTLFLAG_RW, &maxsockets, 0, sysctl_maxsockets, "IU", "Maximum number of sockets avaliable"); /* - * Initialise maxsockets. This SYSINIT must be run after - * tunable_mbinit(). - */ -static void -init_maxsockets(void *ignored) -{ - - TUNABLE_INT_FETCH("kern.ipc.maxsockets", &maxsockets); -#ifndef __rtems__ - maxsockets = imax(maxsockets, imax(maxfiles, nmbclusters)); -#else /* __rtems__ */ - maxsockets = imax(maxsockets, nmbclusters); -#endif /* __rtems__ */ -} -SYSINIT(param, SI_SUB_TUNABLES, SI_ORDER_ANY, init_maxsockets, NULL); - -/* * Socket operation routines. These routines are called by the routines in * sys_socket.c or from a system process, and implement the semantics of * socket operations by switching out to the protocol specific routines. @@ -450,16 +505,24 @@ sonewconn(struct socket *head, int connstatus) over = (head->so_qlen > 3 * head->so_qlimit / 2); ACCEPT_UNLOCK(); #ifdef REGRESSION - if (regression_sonewconn_earlytest && over) + if (regression_sonewconn_earlytest && over) { #else - if (over) + if (over) { #endif + log(LOG_DEBUG, "%s: pcb %p: Listen queue overflow: " + "%i already in queue awaiting acceptance\n", + __func__, head->so_pcb, head->so_qlen); return (NULL); + } VNET_ASSERT(head->so_vnet != NULL, ("%s:%d so_vnet is NULL, head=%p", __func__, __LINE__, head)); so = soalloc(head->so_vnet); - if (so == NULL) + if (so == NULL) { + log(LOG_DEBUG, "%s: pcb %p: New socket allocation failure: " + "limit reached or out of memory\n", + __func__, head->so_pcb); return (NULL); + } if ((head->so_options & SO_ACCEPTFILTER) != 0) connstatus = 0; so->so_head = head; @@ -476,9 +539,16 @@ sonewconn(struct socket *head, int connstatus) knlist_init_mtx(&so->so_rcv.sb_sel.si_note, SOCKBUF_MTX(&so->so_rcv)); knlist_init_mtx(&so->so_snd.sb_sel.si_note, SOCKBUF_MTX(&so->so_snd)); VNET_SO_ASSERT(head); - if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat) || - (*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL)) { + if (soreserve(so, head->so_snd.sb_hiwat, head->so_rcv.sb_hiwat)) { sodealloc(so); + log(LOG_DEBUG, "%s: pcb %p: soreserve() failed\n", + __func__, head->so_pcb); + return (NULL); + } + if ((*so->so_proto->pr_usrreqs->pru_attach)(so, 0, NULL)) { + sodealloc(so); + log(LOG_DEBUG, "%s: pcb %p: pru_attach() failed\n", + __func__, head->so_pcb); return (NULL); } so->so_rcv.sb_lowat = head->so_rcv.sb_lowat; @@ -489,6 +559,20 @@ sonewconn(struct socket *head, int connstatus) so->so_snd.sb_flags |= head->so_snd.sb_flags & SB_AUTOSIZE; so->so_state |= connstatus; ACCEPT_LOCK(); + /* + * The accept socket may be tearing down but we just + * won a race on the ACCEPT_LOCK. + * However, if sctp_peeloff() is called on a 1-to-many + * style socket, the SO_ACCEPTCONN doesn't need to be set. + */ + if (!(head->so_options & SO_ACCEPTCONN) && + ((head->so_proto->pr_protocol != IPPROTO_SCTP) || + (head->so_type != SOCK_SEQPACKET))) { + SOCK_LOCK(so); + so->so_head = NULL; + sofree(so); /* NB: returns ACCEPT_UNLOCK'ed. */ + return (NULL); + } if (connstatus) { TAILQ_INSERT_TAIL(&head->so_comp, so, so_list); so->so_qstate |= SQ_COMP; @@ -583,9 +667,12 @@ solisten_proto(struct socket *so, int backlog) } /* - * Attempt to free a socket. This should really be sotryfree(). + * Evaluate the reference count and named references on a socket; if no + * references remain, free it. This should be called whenever a reference is + * released, such as in sorele(), but also when named reference flags are + * cleared in socket or protocol code. * - * sofree() will succeed if: + * sofree() will free the socket if: * * - There are no outstanding file descriptor references or related consumers * (so_count == 0). @@ -598,9 +685,6 @@ solisten_proto(struct socket *so, int backlog) * - The socket is not in a completed connection queue, so a process has been * notified that it is present. If it is removed, the user process may * block in accept() despite select() saying the socket was ready. - * - * Otherwise, it will quietly abort so that a future call to sofree(), when - * conditions are right, can succeed. */ void sofree(struct socket *so) @@ -714,9 +798,14 @@ soclose(struct socket *so) drop: if (so->so_proto->pr_usrreqs->pru_close != NULL) (*so->so_proto->pr_usrreqs->pru_close)(so); + ACCEPT_LOCK(); if (so->so_options & SO_ACCEPTCONN) { struct socket *sp; - ACCEPT_LOCK(); + /* + * Prevent new additions to the accept queues due + * to ACCEPT_LOCK races while we are draining them. + */ + so->so_options &= ~SO_ACCEPTCONN; while ((sp = TAILQ_FIRST(&so->so_incomp)) != NULL) { TAILQ_REMOVE(&so->so_incomp, sp, so_list); so->so_incqlen--; @@ -735,13 +824,15 @@ drop: soabort(sp); ACCEPT_LOCK(); } - ACCEPT_UNLOCK(); + KASSERT((TAILQ_EMPTY(&so->so_comp)), + ("%s: so_comp populated", __func__)); + KASSERT((TAILQ_EMPTY(&so->so_incomp)), + ("%s: so_incomp populated", __func__)); } - ACCEPT_LOCK(); SOCK_LOCK(so); KASSERT((so->so_state & SS_NOFDREF) == 0, ("soclose: NOFDREF")); so->so_state |= SS_NOFDREF; - sorele(so); + sorele(so); /* NB: Returns with ACCEPT_UNLOCK(). */ CURVNET_RESTORE(); return (error); } @@ -863,12 +954,6 @@ struct so_zerocopy_stats{ int found_ifp; }; struct so_zerocopy_stats so_zerocp_stats = {0,0,0}; -#include <netinet/in.h> -#include <net/route.h> -#include <netinet/in_pcb.h> -#include <vm/vm.h> -#include <vm/vm_page.h> -#include <vm/vm_object.h> /* * sosend_copyin() is only used if zero copy sockets are enabled. Otherwise @@ -890,11 +975,10 @@ sosend_copyin(struct uio *uio, struct mbuf **retmp, int atomic, long *space, int flags) { struct mbuf *m, **mp, *top; - long len, resid; + long len; + ssize_t resid; int error; -#ifdef ZERO_COPY_SOCKETS int cow_send; -#endif *retmp = top = NULL; mp = ⊤ @@ -902,11 +986,8 @@ sosend_copyin(struct uio *uio, struct mbuf **retmp, int atomic, long *space, resid = uio->uio_resid; error = 0; do { -#ifdef ZERO_COPY_SOCKETS cow_send = 0; -#endif /* ZERO_COPY_SOCKETS */ if (resid >= MINCLSIZE) { -#ifdef ZERO_COPY_SOCKETS if (top == NULL) { m = m_gethdr(M_WAITOK, MT_DATA); m->m_pkthdr.len = 0; @@ -914,9 +995,9 @@ sosend_copyin(struct uio *uio, struct mbuf **retmp, int atomic, long *space, } else m = m_get(M_WAITOK, MT_DATA); if (so_zero_copy_send && - resid>=PAGE_SIZE && - *space>=PAGE_SIZE && - uio->uio_iov->iov_len>=PAGE_SIZE) { + resid >= PAGE_SIZE && + *space >= PAGE_SIZE && + uio->uio_iov->iov_len >= PAGE_SIZE) { so_zerocp_stats.size_ok++; so_zerocp_stats.align_ok++; cow_send = socow_setup(m, uio); @@ -926,15 +1007,6 @@ sosend_copyin(struct uio *uio, struct mbuf **retmp, int atomic, long *space, m_clget(m, M_WAITOK); len = min(min(MCLBYTES, resid), *space); } -#else /* ZERO_COPY_SOCKETS */ - if (top == NULL) { - m = m_getcl(M_WAIT, MT_DATA, M_PKTHDR); - m->m_pkthdr.len = 0; - m->m_pkthdr.rcvif = NULL; - } else - m = m_getcl(M_WAIT, MT_DATA, 0); - len = min(min(MCLBYTES, resid), *space); -#endif /* ZERO_COPY_SOCKETS */ } else { if (top == NULL) { m = m_gethdr(M_WAIT, MT_DATA); @@ -959,12 +1031,10 @@ sosend_copyin(struct uio *uio, struct mbuf **retmp, int atomic, long *space, } *space -= len; -#ifdef ZERO_COPY_SOCKETS if (cow_send) error = 0; else -#endif /* ZERO_COPY_SOCKETS */ - error = uiomove(mtod(m, void *), (int)len, uio); + error = uiomove(mtod(m, void *), (int)len, uio); resid = uio->uio_resid; m->m_len = len; *mp = m; @@ -982,7 +1052,7 @@ out: *retmp = top; return (error); } -#endif /*ZERO_COPY_SOCKETS*/ +#endif /* ZERO_COPY_SOCKETS */ #define SBLOCKWAIT(f) (((f) & MSG_DONTWAIT) ? 0 : SBL_WAIT) @@ -990,7 +1060,8 @@ int sosend_dgram(struct socket *so, struct sockaddr *addr, struct uio *uio, struct mbuf *top, struct mbuf *control, int flags, struct thread *td) { - long space, resid; + long space; + ssize_t resid; int clen = 0, error, dontroute; #ifdef ZERO_COPY_SOCKETS int atomic = sosendallatonce(so) || top; @@ -1164,7 +1235,8 @@ int sosend_generic(struct socket *so, struct sockaddr *addr, struct uio *uio, struct mbuf *top, struct mbuf *control, int flags, struct thread *td) { - long space, resid; + long space; + ssize_t resid; int clen = 0, error, dontroute; int atomic = sosendallatonce(so) || top; @@ -1463,11 +1535,12 @@ soreceive_generic(struct socket *so, struct sockaddr **psa, struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp) { struct mbuf *m, **mp; - int flags, len, error, offset; + int flags, error, offset; + ssize_t len; struct protosw *pr = so->so_proto; struct mbuf *nextrecord; int moff, type = 0; - int orig_resid = uio->uio_resid; + ssize_t orig_resid = uio->uio_resid; mp = mp0; if (psa != NULL) @@ -1920,6 +1993,7 @@ release: /* * Optimized version of soreceive() for stream (TCP) sockets. + * XXXAO: (MSG_WAITALL | MSG_PEEK) isn't properly handled. */ int soreceive_stream(struct socket *so, struct sockaddr **psa, struct uio *uio, @@ -1960,20 +2034,9 @@ soreceive_stream(struct socket *so, struct sockaddr **psa, struct uio *uio, } oresid = uio->uio_resid; - /* We will never ever get anything unless we are connected. */ + /* We will never ever get anything unless we are or were connected. */ if (!(so->so_state & (SS_ISCONNECTED|SS_ISDISCONNECTED))) { - /* When disconnecting there may be still some data left. */ - if (sb->sb_cc > 0) - goto deliver; - if (!(so->so_state & SS_ISDISCONNECTED)) - error = ENOTCONN; - goto out; - } - - /* Socket buffer is empty and we shall not block. */ - if (sb->sb_cc == 0 && - ((sb->sb_flags & SS_NBIO) || (flags & (MSG_DONTWAIT|MSG_NBIO)))) { - error = EAGAIN; + error = ENOTCONN; goto out; } @@ -2000,6 +2063,13 @@ restart: goto out; } + /* Socket buffer is empty and we shall not block. */ + if (sb->sb_cc == 0 && + ((so->so_state & SS_NBIO) || (flags & (MSG_DONTWAIT|MSG_NBIO)))) { + error = EAGAIN; + goto out; + } + /* Socket buffer got some data that we shall deliver now. */ if (sb->sb_cc > 0 && !(flags & MSG_WAITALL) && ((sb->sb_flags & SS_NBIO) || @@ -2012,7 +2082,7 @@ restart: /* On MSG_WAITALL we must wait until all data or error arrives. */ if ((flags & MSG_WAITALL) && - (sb->sb_cc >= uio->uio_resid || sb->sb_cc >= sb->sb_lowat)) + (sb->sb_cc >= uio->uio_resid || sb->sb_cc >= sb->sb_hiwat)) goto deliver; /* @@ -2040,7 +2110,11 @@ deliver: if (mp0 != NULL) { /* Dequeue as many mbufs as possible. */ if (!(flags & MSG_PEEK) && len >= sb->sb_mb->m_len) { - for (*mp0 = m = sb->sb_mb; + if (*mp0 == NULL) + *mp0 = sb->sb_mb; + else + m_cat(*mp0, sb->sb_mb); + for (m = sb->sb_mb; m != NULL && m->m_len <= len; m = m->m_next) { len -= m->m_len; @@ -2048,10 +2122,11 @@ deliver: sbfree(sb, m); n = m; } + n->m_next = NULL; sb->sb_mb = m; + sb->sb_lastrecord = sb->sb_mb; if (sb->sb_mb == NULL) SB_EMPTY_FIXUP(sb); - n->m_next = NULL; } /* Copy the remainder. */ if (len > 0) { @@ -2062,9 +2137,9 @@ deliver: if (m == NULL) len = 0; /* Don't flush data from sockbuf. */ else - uio->uio_resid -= m->m_len; + uio->uio_resid -= len; if (*mp0 != NULL) - n->m_next = m; + m_cat(*mp0, m); else *mp0 = m; if (*mp0 == NULL) { @@ -2128,7 +2203,8 @@ soreceive_dgram(struct socket *so, struct sockaddr **psa, struct uio *uio, struct mbuf **mp0, struct mbuf **controlp, int *flagsp) { struct mbuf *m, *m2; - int flags, len, error; + int flags, error; + ssize_t len; struct protosw *pr = so->so_proto; struct mbuf *nextrecord; @@ -2334,9 +2410,11 @@ soshutdown(struct socket *so, int how) sorflush(so); if (how != SHUT_RD) { error = (*pr->pr_usrreqs->pru_shutdown)(so); + wakeup(&so->so_timeo); CURVNET_RESTORE(); return (error); } + wakeup(&so->so_timeo); CURVNET_RESTORE(); return (0); } @@ -2445,6 +2523,7 @@ sosetopt(struct socket *so, struct sockopt *sopt) struct linger l; struct timeval tv; u_long val; + uint32_t val32; #ifdef MAC struct mac extmac; #endif @@ -2509,21 +2588,29 @@ sosetopt(struct socket *so, struct sockopt *sopt) case SO_SETFIB: error = sooptcopyin(sopt, &optval, sizeof optval, sizeof optval); + if (error) + goto bad; + if (optval < 0 || optval >= rt_numfibs) { error = EINVAL; goto bad; } if (((so->so_proto->pr_domain->dom_family == PF_INET) || (so->so_proto->pr_domain->dom_family == PF_INET6) || - (so->so_proto->pr_domain->dom_family == PF_ROUTE))) { + (so->so_proto->pr_domain->dom_family == PF_ROUTE))) so->so_fibnum = optval; - /* Note: ignore error */ - if (so->so_proto->pr_ctloutput) - (*so->so_proto->pr_ctloutput)(so, sopt); - } else { + else so->so_fibnum = 0; - } break; + + case SO_USER_COOKIE: + error = sooptcopyin(sopt, &val32, sizeof val32, + sizeof val32); + if (error) + goto bad; + so->so_user_cookie = val32; + break; + case SO_SNDBUF: case SO_RCVBUF: case SO_SNDLOWAT: @@ -3289,24 +3376,6 @@ socheckuid(struct socket *so, uid_t uid) } #endif /* __rtems__ */ -static int -sysctl_somaxconn(SYSCTL_HANDLER_ARGS) -{ - int error; - int val; - - val = somaxconn; - error = sysctl_handle_int(oidp, &val, 0, req); - if (error || !req->newptr ) - return (error); - - if (val < 1 || val > USHRT_MAX) - return (EINVAL); - - somaxconn = val; - return (0); -} - /* * These functions are used by protocols to notify the socket layer (and its * consumers) of state changes in the sockets driven by protocol-side events. diff --git a/freebsd/sys/kern/uipc_syscalls.c b/freebsd/sys/kern/uipc_syscalls.c index c62cc9b5..738b5c3c 100644 --- a/freebsd/sys/kern/uipc_syscalls.c +++ b/freebsd/sys/kern/uipc_syscalls.c @@ -37,6 +37,7 @@ #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); +#include <rtems/bsd/local/opt_capsicum.h> #include <rtems/bsd/local/opt_inet.h> #include <rtems/bsd/local/opt_inet6.h> #include <rtems/bsd/local/opt_sctp.h> @@ -45,6 +46,7 @@ __FBSDID("$FreeBSD$"); #include <rtems/bsd/sys/param.h> #include <sys/systm.h> +#include <sys/capability.h> #include <sys/kernel.h> #include <rtems/bsd/sys/lock.h> #include <sys/mutex.h> @@ -82,6 +84,7 @@ __FBSDID("$FreeBSD$"); #include <security/mac/mac_framework.h> #include <vm/vm.h> +#include <vm/vm_param.h> #include <vm/vm_object.h> #include <vm/vm_page.h> #include <vm/vm_pageout.h> @@ -96,6 +99,16 @@ __FBSDID("$FreeBSD$"); #endif /* INET || INET6 */ #ifdef __rtems__ #include <machine/rtems-bsd-syscall-api.h> + +static int kern_bind(struct thread *, int, struct sockaddr *); + +static int kern_connect(struct thread *, int, struct sockaddr *); + +static int kern_setsockopt( struct thread *td, int s, int level, int name, + void *val, enum uio_seg valseg, socklen_t valsize); + +static int kern_getsockopt( struct thread *td, int s, int level, int name, + void *val, enum uio_seg valseg, socklen_t *valsize); #endif /* __rtems__ */ static int sendit(struct thread *td, int s, struct msghdr *mp, int flags); @@ -124,33 +137,47 @@ SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0, "Number of sendfile(2) sf_bufs in use"); /* - * Convert a user file descriptor to a kernel file entry. A reference on the - * file entry is held upon returning. This is lighter weight than - * fgetsock(), which bumps the socket reference drops the file reference - * count instead, as this approach avoids several additional mutex operations - * associated with the additional reference count. If requested, return the - * open file flags. + * Convert a user file descriptor to a kernel file entry and check that, if + * it is a capability, the right rights are present. A reference on the file + * entry is held upon returning. */ static int -getsock(struct filedesc *fdp, int fd, struct file **fpp, u_int *fflagp) +getsock_cap(struct filedesc *fdp, int fd, cap_rights_t rights, + struct file **fpp, u_int *fflagp) { struct file *fp; +#ifdef CAPABILITIES + struct file *fp_fromcap; int error; +#endif fp = NULL; - if (fdp == NULL || (fp = fget_unlocked(fdp, fd)) == NULL) { - error = EBADF; - } else if (fp->f_type != DTYPE_SOCKET) { + if ((fdp == NULL) || ((fp = fget_unlocked(fdp, fd)) == NULL)) + return (EBADF); +#ifdef CAPABILITIES + /* + * If the file descriptor is for a capability, test rights and use + * the file descriptor referenced by the capability. + */ + error = cap_funwrap(fp, rights, &fp_fromcap); + if (error) { fdrop(fp, curthread); - fp = NULL; - error = ENOTSOCK; - } else { - if (fflagp != NULL) - *fflagp = fp->f_flag; - error = 0; + return (error); } + if (fp != fp_fromcap) { + fhold(fp_fromcap); + fdrop(fp, curthread); + fp = fp_fromcap; + } +#endif /* CAPABILITIES */ + if (fp->f_type != DTYPE_SOCKET) { + fdrop(fp, curthread); + return (ENOTSOCK); + } + if (fflagp != NULL) + *fflagp = fp->f_flag; *fpp = fp; - return (error); + return (0); } #else /* __rtems__ */ static int @@ -185,7 +212,7 @@ rtems_bsd_getsock(int fd, struct file **fpp, u_int *fflagp) return (error); } -#define getsock(fdp, fd, fpp, fflagp) rtems_bsd_getsock(fd, fpp, fflagp) +#define getsock_cap(fdp, fd, rights, fpp, fflagp) rtems_bsd_getsock(fd, fpp, fflagp) #endif /* __rtems__ */ /* @@ -195,13 +222,11 @@ rtems_bsd_getsock(int fd, struct file **fpp, u_int *fflagp) #define COMPAT_OLDSOCK #endif -#ifndef __rtems__ -int -socket(td, uap) -#else /* __rtems__ */ -static int -rtems_bsd_socket(td, uap) +#ifdef __rtems__ +static #endif /* __rtems__ */ +int +sys_socket(td, uap) struct thread *td; struct socket_args /* { int domain; @@ -226,7 +251,7 @@ rtems_bsd_socket(td, uap) #ifndef __rtems__ fdp = td->td_proc->p_fd; #endif /* __rtems__ */ - error = falloc(td, &fp, &fd); + error = falloc(td, &fp, &fd, 0); if (error) return (error); /* An extra reference on `fp' has been held for us by falloc(). */ @@ -254,7 +279,7 @@ socket(int domain, int type, int protocol) int error; if (td != NULL) { - error = rtems_bsd_socket(td, &ua); + error = sys_socket(td, &ua); } else { error = ENOMEM; } @@ -267,16 +292,12 @@ socket(int domain, int type, int protocol) } #endif /* __rtems__ */ -#ifndef __rtems__ /* ARGSUSED */ -int -bind(td, uap) -#else /* __rtems__ */ -static int kern_bind(struct thread *, int, struct sockaddr *); - -static int -rtems_bsd_bind(td, uap) +#ifdef __rtems__ +static #endif /* __rtems__ */ +int +sys_bind(td, uap) struct thread *td; struct bind_args /* { int s; @@ -307,7 +328,7 @@ bind(int socket, const struct sockaddr *address, socklen_t address_len) int error; if (td != NULL) { - error = rtems_bsd_bind(td, &ua); + error = sys_bind(td, &ua); } else { error = ENOMEM; } @@ -327,7 +348,7 @@ kern_bind(td, fd, sa) int error; AUDIT_ARG_FD(fd); - error = getsock(td->td_proc->p_fd, fd, &fp, NULL); + error = getsock_cap(td->td_proc->p_fd, fd, CAP_BIND, &fp, NULL); if (error) return (error); so = fp->f_data; @@ -345,13 +366,11 @@ kern_bind(td, fd, sa) } /* ARGSUSED */ -#ifndef __rtems__ -int -listen(td, uap) -#else /* __rtems__ */ -static int -rtems_bsd_listen(td, uap) +#ifdef __rtems__ +static #endif /* __rtems__ */ +int +sys_listen(td, uap) struct thread *td; struct listen_args /* { int s; @@ -363,7 +382,7 @@ rtems_bsd_listen(td, uap) int error; AUDIT_ARG_FD(uap->s); - error = getsock(td->td_proc->p_fd, uap->s, &fp, NULL); + error = getsock_cap(td->td_proc->p_fd, uap->s, CAP_LISTEN, &fp, NULL); if (error == 0) { so = fp->f_data; #ifdef MAC @@ -387,7 +406,7 @@ listen(int socket, int backlog) int error; if (td != NULL) { - error = rtems_bsd_listen(td, &ua); + error = sys_listen(td, &ua); } else { error = ENOMEM; } @@ -508,7 +527,7 @@ kern_accept(struct thread *td, int s, struct sockaddr **name, #ifndef __rtems__ fdp = td->td_proc->p_fd; #endif /* __rtems__ */ - error = getsock(fdp, s, &headfp, &fflag); + error = getsock_cap(fdp, s, CAP_ACCEPT, &headfp, &fflag); if (error) return (error); head = headfp->f_data; @@ -521,7 +540,7 @@ kern_accept(struct thread *td, int s, struct sockaddr **name, if (error != 0) goto done; #endif - error = falloc(td, &nfp, &fd); + error = falloc(td, &nfp, &fd, 0); if (error) goto done; ACCEPT_LOCK(); @@ -643,7 +662,7 @@ done: #ifndef __rtems__ int -accept(td, uap) +sys_accept(td, uap) struct thread *td; struct accept_args *uap; { @@ -664,15 +683,11 @@ oaccept(td, uap) #endif /* __rtems__ */ /* ARGSUSED */ -#ifndef __rtems__ -int -connect(td, uap) -#else /* __rtems__ */ -static int kern_connect(struct thread *, int, struct sockaddr *); - -static int -rtems_bsd_connect(td, uap) +#ifdef __rtems__ +static #endif /* __rtems__ */ +int +sys_connect(td, uap) struct thread *td; struct connect_args /* { int s; @@ -704,7 +719,7 @@ connect(int socket, const struct sockaddr *address, socklen_t address_len) int error; if (td != NULL) { - error = rtems_bsd_connect(td, &ua); + error = sys_connect(td, &ua); } else { error = ENOMEM; } @@ -726,7 +741,7 @@ kern_connect(td, fd, sa) int interrupted = 0; AUDIT_ARG_FD(fd); - error = getsock(td->td_proc->p_fd, fd, &fp, NULL); + error = getsock_cap(td->td_proc->p_fd, fd, CAP_CONNECT, &fp, NULL); if (error) return (error); so = fp->f_data; @@ -800,12 +815,12 @@ kern_socketpair(struct thread *td, int domain, int type, int protocol, if (error) goto free1; /* On success extra reference to `fp1' and 'fp2' is set by falloc. */ - error = falloc(td, &fp1, &fd); + error = falloc(td, &fp1, &fd, 0); if (error) goto free2; rsv[0] = fd; fp1->f_data = so1; /* so1 already has ref count */ - error = falloc(td, &fp2, &fd); + error = falloc(td, &fp2, &fd, 0); if (error) goto free3; fp2->f_data = so2; /* so2 already has ref count */ @@ -842,7 +857,7 @@ free1: } int -socketpair(struct thread *td, struct socketpair_args *uap) +sys_socketpair(struct thread *td, struct socketpair_args *uap) { int error, sv[2]; @@ -875,6 +890,11 @@ sendit(td, s, mp, flags) struct sockaddr *to; int error; +#ifdef CAPABILITY_MODE + if (IN_CAPABILITY_MODE(td) && (mp->msg_name != NULL)) + return (ECAPMODE); +#endif + if (mp->msg_name != NULL) { error = getsockaddr(&to, mp->msg_name, mp->msg_namelen); if (error) { @@ -935,14 +955,18 @@ kern_sendit(td, s, mp, flags, control, segflg) struct uio auio; struct iovec *iov; struct socket *so; - int i; - int len, error; + int i, error; + ssize_t len; + cap_rights_t rights; #ifdef KTRACE struct uio *ktruio = NULL; #endif AUDIT_ARG_FD(s); - error = getsock(td->td_proc->p_fd, s, &fp, NULL); + rights = CAP_WRITE; + if (mp->msg_name != NULL) + rights |= CAP_CONNECT; + error = getsock_cap(td->td_proc->p_fd, s, rights, &fp, NULL); if (error) return (error); so = (struct socket *)fp->f_data; @@ -992,7 +1016,7 @@ kern_sendit(td, s, mp, flags, control, segflg) !(flags & MSG_NOSIGNAL)) { #ifndef __rtems__ PROC_LOCK(td->td_proc); - tdksignal(td, SIGPIPE, NULL); + tdsignal(td, SIGPIPE); PROC_UNLOCK(td->td_proc); #else /* __rtems__ */ /* FIXME: Determine if we really want to use signals */ @@ -1012,13 +1036,11 @@ bad: return (error); } -#ifndef __rtems__ -int -sendto(td, uap) -#else /* __rtems__ */ -static int -rtems_bsd_sendto(td, uap) +#ifdef __rtems__ +static #endif /* __rtems__ */ +int +sys_sendto(td, uap) struct thread *td; struct sendto_args /* { int s; @@ -1063,7 +1085,7 @@ sendto(int socket, const void *message, size_t length, int flags, int error; if (td != NULL) { - error = rtems_bsd_sendto(td, &ua); + error = sys_sendto(td, &ua); } else { error = ENOMEM; } @@ -1132,13 +1154,11 @@ osendmsg(td, uap) #endif #endif /* __rtems__ */ -#ifndef __rtems__ -int -sendmsg(td, uap) -#else /* __rtems__ */ -static int -rtems_bsd_sendmsg(td, uap) +#ifdef __rtems__ +static #endif /* __rtems__ */ +int +sys_sendmsg(td, uap) struct thread *td; struct sendmsg_args /* { int s; @@ -1177,7 +1197,7 @@ sendmsg(int socket, const struct msghdr *message, int flags) int error; if (td != NULL) { - error = rtems_bsd_sendmsg(td, &ua); + error = sys_sendmsg(td, &ua); } else { error = ENOMEM; } @@ -1204,7 +1224,7 @@ kern_recvit(td, s, mp, fromseg, controlp) struct uio auio; struct iovec *iov; int i; - socklen_t len; + ssize_t len; int error; struct mbuf *m, *control = 0; caddr_t ctlbuf; @@ -1215,11 +1235,11 @@ kern_recvit(td, s, mp, fromseg, controlp) struct uio *ktruio = NULL; #endif - if(controlp != NULL) - *controlp = 0; + if (controlp != NULL) + *controlp = NULL; AUDIT_ARG_FD(s); - error = getsock(td->td_proc->p_fd, s, &fp, NULL); + error = getsock_cap(td->td_proc->p_fd, s, CAP_READ, &fp, NULL); if (error) return (error); so = fp->f_data; @@ -1255,19 +1275,19 @@ kern_recvit(td, s, mp, fromseg, controlp) (mp->msg_control || controlp) ? &control : (struct mbuf **)0, &mp->msg_flags); if (error) { - if (auio.uio_resid != (int)len && (error == ERESTART || + if (auio.uio_resid != len && (error == ERESTART || error == EINTR || error == EWOULDBLOCK)) error = 0; } #ifdef KTRACE if (ktruio != NULL) { - ktruio->uio_resid = (int)len - auio.uio_resid; + ktruio->uio_resid = len - auio.uio_resid; ktrgenio(s, UIO_READ, ktruio, error); } #endif if (error) goto out; - td->td_retval[0] = (int)len - auio.uio_resid; + td->td_retval[0] = len - auio.uio_resid; if (mp->msg_name) { len = mp->msg_namelen; if (len <= 0 || fromsa == 0) @@ -1375,13 +1395,11 @@ recvit(td, s, mp, namelenp) return (error); } -#ifndef __rtems__ -int -recvfrom(td, uap) -#else /* __rtems__ */ -static int -rtems_bsd_recvfrom(td, uap) +#ifdef __rtems__ +static #endif /* __rtems__ */ +int +sys_recvfrom(td, uap) struct thread *td; struct recvfrom_args /* { int s; @@ -1432,7 +1450,7 @@ recvfrom(int socket, void *__restrict buffer, size_t length, int flags, int error; if (td != NULL) { - error = rtems_bsd_recvfrom(td, &ua); + error = sys_recvfrom(td, &ua); } else { error = ENOMEM; } @@ -1454,7 +1472,7 @@ orecvfrom(td, uap) { uap->flags |= MSG_COMPAT; - return (recvfrom(td, uap)); + return (sys_recvfrom(td, uap)); } #endif @@ -1521,13 +1539,11 @@ orecvmsg(td, uap) #endif #endif /* __rtems__ */ -#ifndef __rtems__ -int -recvmsg(td, uap) -#else /* __rtems__ */ -static int -rtems_bsd_recvmsg(td, uap) +#ifdef __rtems__ +static #endif /* __rtems__ */ +int +sys_recvmsg(td, uap) struct thread *td; struct recvmsg_args /* { int s; @@ -1572,7 +1588,7 @@ recvmsg(int socket, struct msghdr *message, int flags) int error; if (td != NULL) { - error = rtems_bsd_recvmsg(td, &ua); + error = sys_recvmsg(td, &ua); } else { error = ENOMEM; } @@ -1586,13 +1602,11 @@ recvmsg(int socket, struct msghdr *message, int flags) #endif /* __rtems__ */ /* ARGSUSED */ -#ifndef __rtems__ -int -shutdown(td, uap) -#else /* __rtems__ */ -static int -rtems_bsd_shutdown(td, uap) +#ifdef __rtems__ +static #endif /* __rtems__ */ +int +sys_shutdown(td, uap) struct thread *td; struct shutdown_args /* { int s; @@ -1604,7 +1618,8 @@ rtems_bsd_shutdown(td, uap) int error; AUDIT_ARG_FD(uap->s); - error = getsock(td->td_proc->p_fd, uap->s, &fp, NULL); + error = getsock_cap(td->td_proc->p_fd, uap->s, CAP_SHUTDOWN, &fp, + NULL); if (error == 0) { so = fp->f_data; error = soshutdown(so, uap->how); @@ -1620,24 +1635,18 @@ shutdown(int socket, int how) .s = socket, .how = how }; - int error = rtems_bsd_shutdown(NULL, &ua); + int error = sys_shutdown(NULL, &ua); return rtems_bsd_error_to_status_and_errno(error); } #endif /* __rtems__ */ /* ARGSUSED */ -#ifndef __rtems__ -int -setsockopt(td, uap) -#else /* __rtems__ */ -static int -kern_setsockopt( struct thread *td, int s, int level, int name, void *val, - enum uio_seg valseg, socklen_t valsize); - -static int -rtems_bsd_setsockopt(td, uap) +#ifdef __rtems__ +static #endif /* __rtems__ */ +int +sys_setsockopt(td, uap) struct thread *td; struct setsockopt_args /* { int s; @@ -1667,7 +1676,7 @@ setsockopt(int socket, int level, int option_name, const void *option_value, int error; if (td != NULL) { - error = rtems_bsd_setsockopt(td, &ua); + error = sys_setsockopt(td, &ua); } else { error = ENOMEM; } @@ -1713,7 +1722,7 @@ kern_setsockopt(td, s, level, name, val, valseg, valsize) } AUDIT_ARG_FD(s); - error = getsock(td->td_proc->p_fd, s, &fp, NULL); + error = getsock_cap(td->td_proc->p_fd, s, CAP_SETSOCKOPT, &fp, NULL); if (error == 0) { so = fp->f_data; error = sosetopt(so, &sopt); @@ -1723,17 +1732,11 @@ kern_setsockopt(td, s, level, name, val, valseg, valsize) } /* ARGSUSED */ -#ifndef __rtems__ -int -getsockopt(td, uap) -#else /* __rtems__ */ -static int -kern_getsockopt( struct thread *td, int s, int level, int name, void *val, - enum uio_seg valseg, socklen_t *valsize); - -static int -rtems_bsd_getsockopt(td, uap) +#ifdef __rtems__ +static #endif /* __rtems__ */ +int +sys_getsockopt(td, uap) struct thread *td; struct getsockopt_args /* { int s; @@ -1775,7 +1778,7 @@ getsockopt(int socket, int level, int option_name, void *__restrict int error; if (td != NULL) { - error = rtems_bsd_getsockopt(td, &ua); + error = sys_getsockopt(td, &ua); } else { error = ENOMEM; } @@ -1825,7 +1828,7 @@ kern_getsockopt(td, s, level, name, val, valseg, valsize) } AUDIT_ARG_FD(s); - error = getsock(td->td_proc->p_fd, s, &fp, NULL); + error = getsock_cap(td->td_proc->p_fd, s, CAP_GETSOCKOPT, &fp, NULL); if (error == 0) { so = fp->f_data; error = sogetopt(so, &sopt); @@ -1914,7 +1917,7 @@ kern_getsockname(struct thread *td, int fd, struct sockaddr **sa, return (EINVAL); AUDIT_ARG_FD(fd); - error = getsock(td->td_proc->p_fd, fd, &fp, NULL); + error = getsock_cap(td->td_proc->p_fd, fd, CAP_GETSOCKNAME, &fp, NULL); if (error) return (error); so = fp->f_data; @@ -1944,7 +1947,7 @@ bad: #ifndef __rtems__ int -getsockname(td, uap) +sys_getsockname(td, uap) struct thread *td; struct getsockname_args *uap; { @@ -2043,7 +2046,7 @@ kern_getpeername(struct thread *td, int fd, struct sockaddr **sa, return (EINVAL); AUDIT_ARG_FD(fd); - error = getsock(td->td_proc->p_fd, fd, &fp, NULL); + error = getsock_cap(td->td_proc->p_fd, fd, CAP_GETPEERNAME, &fp, NULL); if (error) return (error); so = fp->f_data; @@ -2078,7 +2081,7 @@ done: #ifndef __rtems__ int -getpeername(td, uap) +sys_getpeername(td, uap) struct thread *td; struct getpeername_args *uap; { @@ -2188,7 +2191,7 @@ sf_buf_mext(void *addr, void *args) m = sf_buf_page(args); sf_buf_free(args); - vm_page_lock_queues(); + vm_page_lock(m); vm_page_unwire(m, 0); /* * Check for the object going away on us. This can @@ -2197,7 +2200,7 @@ sf_buf_mext(void *addr, void *args) */ if (m->wire_count == 0 && m->object == NULL) vm_page_free(m); - vm_page_unlock_queues(); + vm_page_unlock(m); if (addr == NULL) return; sfs = addr; @@ -2220,7 +2223,7 @@ sf_buf_mext(void *addr, void *args) * specified, write the total number of bytes sent into *sbytes. */ int -sendfile(struct thread *td, struct sendfile_args *uap) +sys_sendfile(struct thread *td, struct sendfile_args *uap) { return (do_sendfile(td, uap, 0)); @@ -2290,9 +2293,11 @@ kern_sendfile(struct thread *td, struct sendfile_args *uap, struct mbuf *m = NULL; struct sf_buf *sf; struct vm_page *pg; + struct vattr va; off_t off, xfsize, fsbytes = 0, sbytes = 0, rem = 0; int error, hdrlen = 0, mnw = 0; int vfslocked; + int bsize; struct sendfile_sync *sfs = NULL; /* @@ -2302,11 +2307,23 @@ kern_sendfile(struct thread *td, struct sendfile_args *uap, * we send only the header/trailer and no payload data. */ AUDIT_ARG_FD(uap->fd); - if ((error = fgetvp_read(td, uap->fd, &vp)) != 0) + if ((error = fgetvp_read(td, uap->fd, CAP_READ, &vp)) != 0) goto out; vfslocked = VFS_LOCK_GIANT(vp->v_mount); vn_lock(vp, LK_SHARED | LK_RETRY); if (vp->v_type == VREG) { + bsize = vp->v_mount->mnt_stat.f_iosize; + if (uap->nbytes == 0) { + error = VOP_GETATTR(vp, &va, td->td_ucred); + if (error != 0) { + VOP_UNLOCK(vp, 0); + VFS_UNLOCK_GIANT(vfslocked); + obj = NULL; + goto out; + } + rem = va.va_size; + } else + rem = uap->nbytes; obj = vp->v_object; if (obj != NULL) { /* @@ -2324,7 +2341,8 @@ kern_sendfile(struct thread *td, struct sendfile_args *uap, obj = NULL; } } - } + } else + bsize = 0; /* silence gcc */ VOP_UNLOCK(vp, 0); VFS_UNLOCK_GIANT(vfslocked); if (obj == NULL) { @@ -2340,8 +2358,8 @@ kern_sendfile(struct thread *td, struct sendfile_args *uap, * The socket must be a stream socket and connected. * Remember if it a blocking or non-blocking socket. */ - if ((error = getsock(td->td_proc->p_fd, uap->s, &sock_fp, - NULL)) != 0) + if ((error = getsock_cap(td->td_proc->p_fd, uap->s, CAP_WRITE, + &sock_fp, NULL)) != 0) goto out; so = sock_fp->f_data; if (so->so_type != SOCK_STREAM) { @@ -2361,8 +2379,7 @@ kern_sendfile(struct thread *td, struct sendfile_args *uap, mnw = 1; if (uap->flags & SF_SYNC) { - sfs = malloc(sizeof *sfs, M_TEMP, M_WAITOK); - memset(sfs, 0, sizeof *sfs); + sfs = malloc(sizeof *sfs, M_TEMP, M_WAITOK | M_ZERO); mtx_init(&sfs->mtx, "sendfile", NULL, MTX_DEF); cv_init(&sfs->cv, "sendfile"); } @@ -2418,10 +2435,20 @@ kern_sendfile(struct thread *td, struct sendfile_args *uap, * The outer loop checks the state and available space of the socket * and takes care of the overall progress. */ - for (off = uap->offset, rem = uap->nbytes; ; ) { - int loopbytes = 0; - int space = 0; - int done = 0; + for (off = uap->offset; ; ) { + struct mbuf *mtail; + int loopbytes; + int space; + int done; + + if ((uap->nbytes != 0 && uap->nbytes == fsbytes) || + (uap->nbytes == 0 && va.va_size == fsbytes)) + break; + + mtail = NULL; + loopbytes = 0; + space = 0; + done = 0; /* * Check the socket state for ongoing connection, @@ -2489,34 +2516,42 @@ retry_space: */ space -= hdrlen; + vfslocked = VFS_LOCK_GIANT(vp->v_mount); + error = vn_lock(vp, LK_SHARED); + if (error != 0) { + VFS_UNLOCK_GIANT(vfslocked); + goto done; + } + error = VOP_GETATTR(vp, &va, td->td_ucred); + if (error != 0 || off >= va.va_size) { + VOP_UNLOCK(vp, 0); + VFS_UNLOCK_GIANT(vfslocked); + goto done; + } + VFS_UNLOCK_GIANT(vfslocked); + /* * Loop and construct maximum sized mbuf chain to be bulk * dumped into socket buffer. */ - while(space > loopbytes) { + while (space > loopbytes) { vm_pindex_t pindex; vm_offset_t pgoff; struct mbuf *m0; - VM_OBJECT_LOCK(obj); /* * Calculate the amount to transfer. * Not to exceed a page, the EOF, * or the passed in nbytes. */ pgoff = (vm_offset_t)(off & PAGE_MASK); - xfsize = omin(PAGE_SIZE - pgoff, - obj->un_pager.vnp.vnp_size - uap->offset - - fsbytes - loopbytes); - if (uap->nbytes) - rem = (uap->nbytes - fsbytes - loopbytes); - else - rem = obj->un_pager.vnp.vnp_size - - uap->offset - fsbytes - loopbytes; - xfsize = omin(rem, xfsize); + rem = va.va_size - uap->offset; + if (uap->nbytes != 0) + rem = omin(rem, uap->nbytes); + rem -= fsbytes + loopbytes; + xfsize = omin(PAGE_SIZE - pgoff, rem); xfsize = omin(space - loopbytes, xfsize); if (xfsize <= 0) { - VM_OBJECT_UNLOCK(obj); done = 1; /* all data sent */ break; } @@ -2526,6 +2561,7 @@ retry_space: * if not found or wait and loop if busy. */ pindex = OFF_TO_IDX(off); + VM_OBJECT_LOCK(obj); pg = vm_page_grab(obj, pindex, VM_ALLOC_NOBUSY | VM_ALLOC_NORMAL | VM_ALLOC_WIRED | VM_ALLOC_RETRY); @@ -2543,45 +2579,29 @@ retry_space: else if (uap->flags & SF_NODISKIO) error = EBUSY; else { - int bsize, resid; + ssize_t resid; - /* - * Ensure that our page is still around - * when the I/O completes. - */ - vm_page_io_start(pg); VM_OBJECT_UNLOCK(obj); /* * Get the page from backing store. - */ - vfslocked = VFS_LOCK_GIANT(vp->v_mount); - error = vn_lock(vp, LK_SHARED); - if (error != 0) - goto after_read; - bsize = vp->v_mount->mnt_stat.f_iosize; - - /* * XXXMAC: Because we don't have fp->f_cred * here, we pass in NOCRED. This is probably * wrong, but is consistent with our original * implementation. */ + vfslocked = VFS_LOCK_GIANT(vp->v_mount); error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE, trunc_page(off), UIO_NOCOPY, IO_NODELOCKED | IO_VMIO | ((MAXBSIZE / bsize) << IO_SEQSHIFT), td->td_ucred, NOCRED, &resid, td); - VOP_UNLOCK(vp, 0); - after_read: VFS_UNLOCK_GIANT(vfslocked); - VM_OBJECT_LOCK(obj); - vm_page_io_finish(pg); - if (!error) - VM_OBJECT_UNLOCK(obj); + if (error) + VM_OBJECT_LOCK(obj); mbstat.sf_iocnt++; } if (error) { - vm_page_lock_queues(); + vm_page_lock(pg); vm_page_unwire(pg, 0); /* * See if anyone else might know about @@ -2589,11 +2609,9 @@ retry_space: * then free it. */ if (pg->wire_count == 0 && pg->valid == 0 && - pg->busy == 0 && !(pg->oflags & VPO_BUSY) && - pg->hold_count == 0) { + pg->busy == 0 && !(pg->oflags & VPO_BUSY)) vm_page_free(pg); - } - vm_page_unlock_queues(); + vm_page_unlock(pg); VM_OBJECT_UNLOCK(obj); if (error == EAGAIN) error = 0; /* not a real error */ @@ -2613,14 +2631,11 @@ retry_space: SFB_CATCH); if (sf == NULL) { mbstat.sf_allocfail++; - vm_page_lock_queues(); + vm_page_lock(pg); vm_page_unwire(pg, 0); - /* - * XXX: Not same check as above!? - */ - if (pg->wire_count == 0 && pg->object == NULL) - vm_page_free(pg); - vm_page_unlock_queues(); + KASSERT(pg->object != NULL, + ("kern_sendfile: object disappeared")); + vm_page_unlock(pg); if (m == NULL) error = (mnw ? EAGAIN : EINTR); break; @@ -2633,7 +2648,7 @@ retry_space: m0 = m_get((mnw ? M_NOWAIT : M_WAITOK), MT_DATA); if (m0 == NULL) { error = (mnw ? EAGAIN : ENOBUFS); - sf_buf_mext((void *)sf_buf_kva(sf), sf); + sf_buf_mext(NULL, sf); break; } MEXTADD(m0, sf_buf_kva(sf), PAGE_SIZE, sf_buf_mext, @@ -2642,10 +2657,13 @@ retry_space: m0->m_len = xfsize; /* Append to mbuf chain. */ - if (m != NULL) - m_cat(m, m0); + if (mtail != NULL) + mtail->m_next = m0; + else if (m != NULL) + m_last(m)->m_next = m0; else m = m0; + mtail = m0; /* Keep track of bits processed. */ loopbytes += xfsize; @@ -2658,6 +2676,8 @@ retry_space: } } + VOP_UNLOCK(vp, 0); + /* Add the buffer chain to the socket buffer. */ if (m != NULL) { int mlen, err; @@ -2762,7 +2782,7 @@ out: * XXX: We should make this loadable one day. */ int -sctp_peeloff(td, uap) +sys_sctp_peeloff(td, uap) struct thread *td; struct sctp_peeloff_args /* { int sd; @@ -2779,7 +2799,7 @@ sctp_peeloff(td, uap) fdp = td->td_proc->p_fd; AUDIT_ARG_FD(uap->sd); - error = fgetsock(td, uap->sd, &head, &fflag); + error = fgetsock(td, uap->sd, CAP_PEELOFF, &head, &fflag); if (error) goto done2; if (head->so_proto->pr_protocol != IPPROTO_SCTP) { @@ -2795,15 +2815,17 @@ sctp_peeloff(td, uap) * but that is ok. */ - error = falloc(td, &nfp, &fd); + error = falloc(td, &nfp, &fd, 0); if (error) goto done; td->td_retval[0] = fd; CURVNET_SET(head->so_vnet); so = sonewconn(head, SS_ISCONNECTED); - if (so == NULL) + if (so == NULL) { + error = ENOMEM; goto noconnection; + } /* * Before changing the flags on the socket, we have to bump the * reference count. Otherwise, if the protocol calls sofree(), @@ -2853,7 +2875,7 @@ done2: } int -sctp_generic_sendmsg (td, uap) +sys_sctp_generic_sendmsg (td, uap) struct thread *td; struct sctp_generic_sendmsg_args /* { int sd, @@ -2876,6 +2898,7 @@ sctp_generic_sendmsg (td, uap) #endif struct uio auio; struct iovec iov[1]; + cap_rights_t rights; if (uap->sinfo) { error = copyin(uap->sinfo, &sinfo, sizeof (sinfo)); @@ -2883,16 +2906,19 @@ sctp_generic_sendmsg (td, uap) return (error); u_sinfo = &sinfo; } + + rights = CAP_WRITE; if (uap->tolen) { error = getsockaddr(&to, uap->to, uap->tolen); if (error) { to = NULL; goto sctp_bad2; } + rights |= CAP_CONNECT; } AUDIT_ARG_FD(uap->sd); - error = getsock(td->td_proc->p_fd, uap->sd, &fp, NULL); + error = getsock_cap(td->td_proc->p_fd, uap->sd, rights, &fp, NULL); if (error) goto sctp_bad; #ifdef KTRACE @@ -2935,7 +2961,7 @@ sctp_generic_sendmsg (td, uap) if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) && !(uap->flags & MSG_NOSIGNAL)) { PROC_LOCK(td->td_proc); - tdksignal(td, SIGPIPE, NULL); + tdsignal(td, SIGPIPE); PROC_UNLOCK(td->td_proc); } } @@ -2960,7 +2986,7 @@ sctp_bad2: } int -sctp_generic_sendmsg_iov(td, uap) +sys_sctp_generic_sendmsg_iov(td, uap) struct thread *td; struct sctp_generic_sendmsg_iov_args /* { int sd, @@ -2976,13 +3002,15 @@ sctp_generic_sendmsg_iov(td, uap) struct sctp_sndrcvinfo sinfo, *u_sinfo = NULL; struct socket *so; struct file *fp = NULL; - int error=0, len, i; + int error=0, i; + ssize_t len; struct sockaddr *to = NULL; #ifdef KTRACE struct uio *ktruio = NULL; #endif struct uio auio; struct iovec *iov, *tiov; + cap_rights_t rights; if (uap->sinfo) { error = copyin(uap->sinfo, &sinfo, sizeof (sinfo)); @@ -2990,16 +3018,18 @@ sctp_generic_sendmsg_iov(td, uap) return (error); u_sinfo = &sinfo; } + rights = CAP_WRITE; if (uap->tolen) { error = getsockaddr(&to, uap->to, uap->tolen); if (error) { to = NULL; goto sctp_bad2; } + rights |= CAP_CONNECT; } AUDIT_ARG_FD(uap->sd); - error = getsock(td->td_proc->p_fd, uap->sd, &fp, NULL); + error = getsock_cap(td->td_proc->p_fd, uap->sd, rights, &fp, NULL); if (error) goto sctp_bad1; @@ -3056,7 +3086,7 @@ sctp_generic_sendmsg_iov(td, uap) if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) && !(uap->flags & MSG_NOSIGNAL)) { PROC_LOCK(td->td_proc); - tdksignal(td, SIGPIPE, NULL); + tdsignal(td, SIGPIPE); PROC_UNLOCK(td->td_proc); } } @@ -3083,7 +3113,7 @@ sctp_bad2: } int -sctp_generic_recvmsg(td, uap) +sys_sctp_generic_recvmsg(td, uap) struct thread *td; struct sctp_generic_recvmsg_args /* { int sd, @@ -3096,7 +3126,7 @@ sctp_generic_recvmsg(td, uap) } */ *uap; { #if (defined(INET) || defined(INET6)) && defined(SCTP) - u_int8_t sockbufstore[256]; + uint8_t sockbufstore[256]; struct uio auio; struct iovec *iov, *tiov; struct sctp_sndrcvinfo sinfo; @@ -3104,14 +3134,15 @@ sctp_generic_recvmsg(td, uap) struct file *fp = NULL; struct sockaddr *fromsa; int fromlen; - int len, i, msg_flags; + ssize_t len; + int i, msg_flags; int error = 0; #ifdef KTRACE struct uio *ktruio = NULL; #endif AUDIT_ARG_FD(uap->sd); - error = getsock(td->td_proc->p_fd, uap->sd, &fp, NULL); + error = getsock_cap(td->td_proc->p_fd, uap->sd, CAP_READ, &fp, NULL); if (error) { return (error); } @@ -3182,7 +3213,7 @@ sctp_generic_recvmsg(td, uap) (struct sctp_sndrcvinfo *)&sinfo, 1); CURVNET_RESTORE(); if (error) { - if (auio.uio_resid != (int)len && (error == ERESTART || + if (auio.uio_resid != len && (error == ERESTART || error == EINTR || error == EWOULDBLOCK)) error = 0; } else { @@ -3191,13 +3222,13 @@ sctp_generic_recvmsg(td, uap) } #ifdef KTRACE if (ktruio != NULL) { - ktruio->uio_resid = (int)len - auio.uio_resid; + ktruio->uio_resid = len - auio.uio_resid; ktrgenio(uap->sd, UIO_READ, ktruio, error); } #endif /* KTRACE */ if (error) goto out; - td->td_retval[0] = (int)len - auio.uio_resid; + td->td_retval[0] = len - auio.uio_resid; if (fromlen && uap->from) { len = fromlen; @@ -3205,7 +3236,7 @@ sctp_generic_recvmsg(td, uap) len = 0; else { len = MIN(len, fromsa->sa_len); - error = copyout(fromsa, uap->from, (unsigned)len); + error = copyout(fromsa, uap->from, (size_t)len); if (error) goto out; } |