diff options
Diffstat (limited to 'freebsd/sys/kern')
-rw-r--r-- | freebsd/sys/kern/init_main.c | 16 | ||||
-rw-r--r-- | freebsd/sys/kern/kern_event.c | 131 | ||||
-rw-r--r-- | freebsd/sys/kern/kern_linker.c | 197 | ||||
-rw-r--r-- | freebsd/sys/kern/kern_mbuf.c | 137 | ||||
-rw-r--r-- | freebsd/sys/kern/kern_mib.c | 7 | ||||
-rw-r--r-- | freebsd/sys/kern/kern_time.c | 242 | ||||
-rw-r--r-- | freebsd/sys/kern/kern_timeout.c | 16 | ||||
-rw-r--r-- | freebsd/sys/kern/subr_lock.c | 4 | ||||
-rw-r--r-- | freebsd/sys/kern/subr_rman.c | 31 | ||||
-rw-r--r-- | freebsd/sys/kern/subr_sbuf.c | 5 | ||||
-rw-r--r-- | freebsd/sys/kern/subr_taskqueue.c | 36 | ||||
-rw-r--r-- | freebsd/sys/kern/sys_generic.c | 4 | ||||
-rw-r--r-- | freebsd/sys/kern/uipc_sockbuf.c | 78 | ||||
-rw-r--r-- | freebsd/sys/kern/uipc_socket.c | 59 | ||||
-rw-r--r-- | freebsd/sys/kern/uipc_syscalls.c | 61 | ||||
-rw-r--r-- | freebsd/sys/kern/uipc_usrreq.c | 19 |
16 files changed, 701 insertions, 342 deletions
diff --git a/freebsd/sys/kern/init_main.c b/freebsd/sys/kern/init_main.c index 88f5f685..f8d04f2c 100644 --- a/freebsd/sys/kern/init_main.c +++ b/freebsd/sys/kern/init_main.c @@ -250,9 +250,6 @@ restart: /* * Traverse the (now) ordered list of system initialization tasks. * Perform each task, and continue on to the next task. - * - * The last item on the list is expected to be the scheduler, - * which will not return. */ for (sipp = sysinit; sipp < sysinit_end; sipp++) { @@ -312,8 +309,14 @@ restart: #endif /* __rtems__ */ } + mtx_assert(&Giant, MA_OWNED | MA_NOTRECURSED); + mtx_unlock(&Giant); + #ifndef __rtems__ - panic("Shouldn't get here!"); + /* + * Now hand over this thread to swapper. + */ + swapper(); /* NOTREACHED*/ #endif /* __rtems__ */ } @@ -358,7 +361,7 @@ static char wit_warn[] = "WARNING: WITNESS option enabled, expect reduced performance.\n"; SYSINIT(witwarn, SI_SUB_COPYRIGHT, SI_ORDER_THIRD + 1, print_caddr_t, wit_warn); -SYSINIT(witwarn2, SI_SUB_RUN_SCHEDULER, SI_ORDER_THIRD + 1, +SYSINIT(witwarn2, SI_SUB_LAST, SI_ORDER_THIRD + 1, print_caddr_t, wit_warn); #endif @@ -367,7 +370,7 @@ static char diag_warn[] = "WARNING: DIAGNOSTIC option enabled, expect reduced performance.\n"; SYSINIT(diagwarn, SI_SUB_COPYRIGHT, SI_ORDER_THIRD + 2, print_caddr_t, diag_warn); -SYSINIT(diagwarn2, SI_SUB_RUN_SCHEDULER, SI_ORDER_THIRD + 2, +SYSINIT(diagwarn2, SI_SUB_LAST, SI_ORDER_THIRD + 2, print_caddr_t, diag_warn); #endif @@ -494,6 +497,7 @@ proc0_init(void *dummy __unused) p->p_sysent = &null_sysvec; p->p_flag = P_SYSTEM | P_INMEM; + p->p_flag2 = 0; p->p_state = PRS_NORMAL; knlist_init_mtx(&p->p_klist, &p->p_mtx); STAILQ_INIT(&p->p_ktr); diff --git a/freebsd/sys/kern/kern_event.c b/freebsd/sys/kern/kern_event.c index 69c47246..89cd1765 100644 --- a/freebsd/sys/kern/kern_event.c +++ b/freebsd/sys/kern/kern_event.c @@ -464,8 +464,11 @@ filt_proc(struct knote *kn, long hint) if (!(kn->kn_status & KN_DETACHED)) knlist_remove_inevent(&p->p_klist, kn); kn->kn_flags |= (EV_EOF | EV_ONESHOT); - kn->kn_data = p->p_xstat; kn->kn_ptr.p_proc = NULL; + if (kn->kn_fflags & NOTE_EXIT) + kn->kn_data = p->p_xstat; + if (kn->kn_fflags == 0) + kn->kn_flags |= EV_DROP; return (1); } @@ -497,7 +500,7 @@ knote_fork(struct knlist *list, int pid) continue; kq = kn->kn_kq; KQ_LOCK(kq); - if ((kn->kn_status & KN_INFLUX) == KN_INFLUX) { + if ((kn->kn_status & (KN_INFLUX | KN_SCAN)) == KN_INFLUX) { KQ_UNLOCK(kq); continue; } @@ -507,7 +510,7 @@ knote_fork(struct knlist *list, int pid) */ if ((kn->kn_sfflags & NOTE_TRACK) == 0) { kn->kn_status |= KN_HASKQLOCK; - if (kn->kn_fop->f_event(kn, NOTE_FORK | pid)) + if (kn->kn_fop->f_event(kn, NOTE_FORK)) KNOTE_ACTIVATE(kn, 1); kn->kn_status &= ~KN_HASKQLOCK; KQ_UNLOCK(kq); @@ -535,10 +538,10 @@ knote_fork(struct knlist *list, int pid) kev.data = kn->kn_id; /* parent */ kev.udata = kn->kn_kevent.udata;/* preserve udata */ error = kqueue_register(kq, &kev, NULL, 0); - if (kn->kn_fop->f_event(kn, NOTE_FORK | pid)) - KNOTE_ACTIVATE(kn, 0); if (error) kn->kn_fflags |= NOTE_TRACKERR; + if (kn->kn_fop->f_event(kn, NOTE_FORK)) + KNOTE_ACTIVATE(kn, 0); KQ_LOCK(kq); kn->kn_status &= ~KN_INFLUX; KQ_UNLOCK_FLUX(kq); @@ -753,11 +756,11 @@ sys_kqueue(struct thread *td, struct kqueue_args *uap) #ifndef __rtems__ FILEDESC_XLOCK(fdp); - SLIST_INSERT_HEAD(&fdp->fd_kqlist, kq, kq_list); + TAILQ_INSERT_HEAD(&fdp->fd_kqlist, kq, kq_list); FILEDESC_XUNLOCK(fdp); #else /* __rtems__ */ rtems_libio_lock(); - SLIST_INSERT_HEAD(&fd_kqlist, kq, kq_list); + TAILQ_INSERT_HEAD(&fd_kqlist, kq, kq_list); rtems_libio_unlock(); #endif /* __rtems__ */ @@ -1076,12 +1079,13 @@ kqueue_register(struct kqueue *kq, struct kevent *kev, struct thread *td, int wa struct file *fp; struct knote *kn, *tkn; int error, filt, event; - int haskqglobal; + int haskqglobal, filedesc_unlock; fp = NULL; kn = NULL; error = 0; haskqglobal = 0; + filedesc_unlock = 0; filt = kev->filter; fops = kqueue_fo_find(filt); @@ -1125,6 +1129,13 @@ findkn: goto done; } + /* + * Pre-lock the filedesc before the global + * lock mutex, see the comment in + * kqueue_close(). + */ + FILEDESC_XLOCK(td->td_proc->p_fd); + filedesc_unlock = 1; KQ_GLOBAL_LOCK(&kq_global, haskqglobal); } @@ -1154,6 +1165,10 @@ findkn: /* knote is in the process of changing, wait for it to stablize. */ if (kn != NULL && (kn->kn_status & KN_INFLUX) == KN_INFLUX) { KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal); + if (filedesc_unlock) { + FILEDESC_XUNLOCK(td->td_proc->p_fd); + filedesc_unlock = 0; + } kq->kq_state |= KQ_FLUXWAIT; msleep(kq, &kq->kq_lock, PSOCK | PDROP, "kqflxwt", 0); if (fp != NULL) { @@ -1229,7 +1244,7 @@ findkn: * but doing so will not reset any filter which has already been * triggered. */ - kn->kn_status |= KN_INFLUX; + kn->kn_status |= KN_INFLUX | KN_SCAN; KQ_UNLOCK(kq); KN_LIST_LOCK(kn); kn->kn_kevent.udata = kev->udata; @@ -1252,7 +1267,7 @@ done_ev_add: KQ_LOCK(kq); if (event) KNOTE_ACTIVATE(kn, 1); - kn->kn_status &= ~KN_INFLUX; + kn->kn_status &= ~(KN_INFLUX | KN_SCAN); KN_LIST_UNLOCK(kn); if ((kev->flags & EV_DISABLE) && @@ -1270,6 +1285,8 @@ done_ev_add: done: KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal); + if (filedesc_unlock) + FILEDESC_XUNLOCK(td->td_proc->p_fd); if (fp != NULL) fdrop(fp, td); if (tkn != NULL) @@ -1541,7 +1558,21 @@ start: KASSERT((kn->kn_status & KN_INFLUX) == 0, ("KN_INFLUX set when not suppose to be")); - if ((kn->kn_flags & EV_ONESHOT) == EV_ONESHOT) { + if ((kn->kn_flags & EV_DROP) == EV_DROP) { + kn->kn_status &= ~KN_QUEUED; + kn->kn_status |= KN_INFLUX; + kq->kq_count--; + KQ_UNLOCK(kq); + /* + * We don't need to lock the list since we've marked + * it _INFLUX. + */ + if (!(kn->kn_status & KN_DETACHED)) + kn->kn_fop->f_detach(kn); + knote_drop(kn, td); + KQ_LOCK(kq); + continue; + } else if ((kn->kn_flags & EV_ONESHOT) == EV_ONESHOT) { kn->kn_status &= ~KN_QUEUED; kn->kn_status |= KN_INFLUX; kq->kq_count--; @@ -1557,7 +1588,7 @@ start: KQ_LOCK(kq); kn = NULL; } else { - kn->kn_status |= KN_INFLUX; + kn->kn_status |= KN_INFLUX | KN_SCAN; KQ_UNLOCK(kq); if ((kn->kn_status & KN_KQUEUE) == KN_KQUEUE) KQ_GLOBAL_LOCK(&kq_global, haskqglobal); @@ -1566,7 +1597,8 @@ start: KQ_LOCK(kq); KQ_GLOBAL_UNLOCK(&kq_global, haskqglobal); kn->kn_status &= - ~(KN_QUEUED | KN_ACTIVE | KN_INFLUX); + ~(KN_QUEUED | KN_ACTIVE | KN_INFLUX | + KN_SCAN); kq->kq_count--; KN_LIST_UNLOCK(kn); influx = 1; @@ -1596,7 +1628,7 @@ start: } else TAILQ_INSERT_TAIL(&kq->kq_head, kn, kn_tqe); - kn->kn_status &= ~(KN_INFLUX); + kn->kn_status &= ~(KN_INFLUX | KN_SCAN); KN_LIST_UNLOCK(kn); influx = 1; } @@ -1788,6 +1820,7 @@ kqueue_close(struct file *fp, struct thread *td) struct knote *kn; int i; int error; + int filedesc_unlock; #ifdef __rtems__ /* FIXME: Move this to the RTEMS close() function */ @@ -1797,6 +1830,7 @@ kqueue_close(struct file *fp, struct thread *td) if ((error = kqueue_acquire(fp, &kq))) return error; + filedesc_unlock = 0; KQ_LOCK(kq); KASSERT((kq->kq_state & KQ_CLOSING) != KQ_CLOSING, @@ -1863,12 +1897,24 @@ kqueue_close(struct file *fp, struct thread *td) KQ_UNLOCK(kq); #ifndef __rtems__ - FILEDESC_XLOCK(fdp); - SLIST_REMOVE(&fdp->fd_kqlist, kq, kqueue, kq_list); - FILEDESC_XUNLOCK(fdp); + /* + * We could be called due to the knote_drop() doing fdrop(), + * called from kqueue_register(). In this case the global + * lock is owned, and filedesc sx is locked before, to not + * take the sleepable lock after non-sleepable. + */ + if (!sx_xlocked(FILEDESC_LOCK(fdp))) { + FILEDESC_XLOCK(fdp); + filedesc_unlock = 1; + } else + filedesc_unlock = 0; + TAILQ_REMOVE(&fdp->fd_kqlist, kq, kq_list); + if (filedesc_unlock) + FILEDESC_XUNLOCK(fdp); #else /* __rtems__ */ + (void)filedesc_unlock; rtems_libio_lock(); - SLIST_REMOVE(&fd_kqlist, kq, kqueue, kq_list); + TAILQ_REMOVE(&fd_kqlist, kq, kq_list); rtems_libio_unlock(); #endif /* __rtems__ */ @@ -1966,28 +2012,33 @@ knote(struct knlist *list, long hint, int lockflags) */ SLIST_FOREACH(kn, &list->kl_list, kn_selnext) { kq = kn->kn_kq; - if ((kn->kn_status & KN_INFLUX) != KN_INFLUX) { + KQ_LOCK(kq); + if ((kn->kn_status & (KN_INFLUX | KN_SCAN)) == KN_INFLUX) { + /* + * Do not process the influx notes, except for + * the influx coming from the kq unlock in the + * kqueue_scan(). In the later case, we do + * not interfere with the scan, since the code + * fragment in kqueue_scan() locks the knlist, + * and cannot proceed until we finished. + */ + KQ_UNLOCK(kq); + } else if ((lockflags & KNF_NOKQLOCK) != 0) { + kn->kn_status |= KN_INFLUX; + KQ_UNLOCK(kq); + error = kn->kn_fop->f_event(kn, hint); KQ_LOCK(kq); - if ((kn->kn_status & KN_INFLUX) == KN_INFLUX) { - KQ_UNLOCK(kq); - } else if ((lockflags & KNF_NOKQLOCK) != 0) { - kn->kn_status |= KN_INFLUX; - KQ_UNLOCK(kq); - error = kn->kn_fop->f_event(kn, hint); - KQ_LOCK(kq); - kn->kn_status &= ~KN_INFLUX; - if (error) - KNOTE_ACTIVATE(kn, 1); - KQ_UNLOCK_FLUX(kq); - } else { - kn->kn_status |= KN_HASKQLOCK; - if (kn->kn_fop->f_event(kn, hint)) - KNOTE_ACTIVATE(kn, 1); - kn->kn_status &= ~KN_HASKQLOCK; - KQ_UNLOCK(kq); - } + kn->kn_status &= ~KN_INFLUX; + if (error) + KNOTE_ACTIVATE(kn, 1); + KQ_UNLOCK_FLUX(kq); + } else { + kn->kn_status |= KN_HASKQLOCK; + if (kn->kn_fop->f_event(kn, hint)) + KNOTE_ACTIVATE(kn, 1); + kn->kn_status &= ~KN_HASKQLOCK; + KQ_UNLOCK(kq); } - kq = NULL; } if ((lockflags & KNF_LISTLOCKED) == 0) list->kl_unlock(list->kl_lockarg); @@ -2231,11 +2282,11 @@ knote_fdclose(struct thread *td, int fd) * since filedesc is locked. */ #ifndef __rtems__ - SLIST_FOREACH(kq, &fdp->fd_kqlist, kq_list) { + TAILQ_FOREACH(kq, &fdp->fd_kqlist, kq_list) { #else /* __rtems__ */ /* FIXME: Use separate lock? */ rtems_libio_lock(); - SLIST_FOREACH(kq, &fd_kqlist, kq_list) { + TAILQ_FOREACH(kq, &fd_kqlist, kq_list) { #endif /* __rtems__ */ KQ_LOCK(kq); diff --git a/freebsd/sys/kern/kern_linker.c b/freebsd/sys/kern/kern_linker.c index b1b46d7a..39664a85 100644 --- a/freebsd/sys/kern/kern_linker.c +++ b/freebsd/sys/kern/kern_linker.c @@ -46,6 +46,7 @@ __FBSDID("$FreeBSD$"); #include <sys/module.h> #include <sys/mount.h> #include <sys/linker.h> +#include <sys/eventhandler.h> #include <sys/fcntl.h> #include <sys/jail.h> #include <sys/libkern.h> @@ -71,17 +72,6 @@ SYSCTL_INT(_debug, OID_AUTO, kld_debug, CTLFLAG_RW, &kld_debug, 0, "Set various levels of KLD debug"); #endif -#define KLD_LOCK() sx_xlock(&kld_sx) -#define KLD_UNLOCK() sx_xunlock(&kld_sx) -#define KLD_DOWNGRADE() sx_downgrade(&kld_sx) -#define KLD_LOCK_READ() sx_slock(&kld_sx) -#define KLD_UNLOCK_READ() sx_sunlock(&kld_sx) -#define KLD_LOCKED() sx_xlocked(&kld_sx) -#define KLD_LOCK_ASSERT() do { \ - if (!cold) \ - sx_assert(&kld_sx, SX_XLOCKED); \ -} while (0) - /* * static char *linker_search_path(const char *name, struct mod_depend * *verinfo); @@ -123,7 +113,8 @@ static int linker_no_more_classes = 0; #define LINKER_GET_NEXT_FILE_ID(a) do { \ linker_file_t lftmp; \ \ - KLD_LOCK_ASSERT(); \ + if (!cold) \ + sx_assert(&kld_sx, SA_XLOCKED); \ retry: \ TAILQ_FOREACH(lftmp, &linker_files, link) { \ if (next_file_id == lftmp->id) { \ @@ -210,6 +201,8 @@ linker_file_sysinit(linker_file_t lf) KLD_DPF(FILE, ("linker_file_sysinit: calling SYSINITs for %s\n", lf->filename)); + sx_assert(&kld_sx, SA_XLOCKED); + if (linker_file_lookup_set(lf, "sysinit_set", &start, &stop, NULL) != 0) return; /* @@ -235,6 +228,7 @@ linker_file_sysinit(linker_file_t lf) * Traverse the (now) ordered list of system initialization tasks. * Perform each task, and continue on to the next task. */ + sx_xunlock(&kld_sx); mtx_lock(&Giant); for (sipp = start; sipp < stop; sipp++) { if ((*sipp)->subsystem == SI_SUB_DUMMY) @@ -244,6 +238,7 @@ linker_file_sysinit(linker_file_t lf) (*((*sipp)->func)) ((*sipp)->udata); } mtx_unlock(&Giant); + sx_xlock(&kld_sx); } static void @@ -254,6 +249,8 @@ linker_file_sysuninit(linker_file_t lf) KLD_DPF(FILE, ("linker_file_sysuninit: calling SYSUNINITs for %s\n", lf->filename)); + sx_assert(&kld_sx, SA_XLOCKED); + if (linker_file_lookup_set(lf, "sysuninit_set", &start, &stop, NULL) != 0) return; @@ -281,6 +278,7 @@ linker_file_sysuninit(linker_file_t lf) * Traverse the (now) ordered list of system initialization tasks. * Perform each task, and continue on to the next task. */ + sx_xunlock(&kld_sx); mtx_lock(&Giant); for (sipp = start; sipp < stop; sipp++) { if ((*sipp)->subsystem == SI_SUB_DUMMY) @@ -290,6 +288,7 @@ linker_file_sysuninit(linker_file_t lf) (*((*sipp)->func)) ((*sipp)->udata); } mtx_unlock(&Giant); + sx_xlock(&kld_sx); } static void @@ -301,13 +300,17 @@ linker_file_register_sysctls(linker_file_t lf) ("linker_file_register_sysctls: registering SYSCTLs for %s\n", lf->filename)); + sx_assert(&kld_sx, SA_XLOCKED); + if (linker_file_lookup_set(lf, "sysctl_set", &start, &stop, NULL) != 0) return; + sx_xunlock(&kld_sx); sysctl_lock(); for (oidp = start; oidp < stop; oidp++) sysctl_register_oid(*oidp); sysctl_unlock(); + sx_xlock(&kld_sx); } static void @@ -318,13 +321,17 @@ linker_file_unregister_sysctls(linker_file_t lf) KLD_DPF(FILE, ("linker_file_unregister_sysctls: registering SYSCTLs" " for %s\n", lf->filename)); + sx_assert(&kld_sx, SA_XLOCKED); + if (linker_file_lookup_set(lf, "sysctl_set", &start, &stop, NULL) != 0) return; + sx_xunlock(&kld_sx); sysctl_lock(); for (oidp = start; oidp < stop; oidp++) sysctl_unregister_oid(*oidp); sysctl_unlock(); + sx_xlock(&kld_sx); } #endif /* __rtems__ */ @@ -339,6 +346,8 @@ linker_file_register_modules(linker_file_t lf) " in %s\n", lf->filename)); #ifndef __rtems__ + sx_assert(&kld_sx, SA_XLOCKED); + if (linker_file_lookup_set(lf, "modmetadata_set", &start, &stop, NULL) != 0) { /* @@ -379,7 +388,9 @@ linker_init_kernel_modules(void) { #ifndef __rtems__ + sx_xlock(&kld_sx); linker_file_register_modules(linker_kernel_file); + sx_xunlock(&kld_sx); #else /* __rtems__ */ linker_file_register_modules(NULL); #endif /* __rtems__ */ @@ -400,7 +411,7 @@ linker_load_file(const char *filename, linker_file_t *result) if (prison0.pr_securelevel > 0) return (EPERM); - KLD_LOCK_ASSERT(); + sx_assert(&kld_sx, SA_XLOCKED); lf = linker_find_file_by_name(filename); if (lf) { KLD_DPF(FILE, ("linker_load_file: file %s is already loaded," @@ -434,10 +445,8 @@ linker_load_file(const char *filename, linker_file_t *result) return (error); } modules = !TAILQ_EMPTY(&lf->modules); - KLD_UNLOCK(); linker_file_register_sysctls(lf); linker_file_sysinit(lf); - KLD_LOCK(); lf->flags |= LINKER_FILE_LINKED; /* @@ -449,6 +458,7 @@ linker_load_file(const char *filename, linker_file_t *result) linker_file_unload(lf, LINKER_UNLOAD_FORCE); return (ENOEXEC); } + EVENTHANDLER_INVOKE(kld_load, lf); *result = lf; return (0); } @@ -488,16 +498,16 @@ linker_reference_module(const char *modname, struct mod_depend *verinfo, modlist_t mod; int error; - KLD_LOCK(); + sx_xlock(&kld_sx); if ((mod = modlist_lookup2(modname, verinfo)) != NULL) { *result = mod->container; (*result)->refs++; - KLD_UNLOCK(); + sx_xunlock(&kld_sx); return (0); } error = linker_load_module(NULL, modname, NULL, verinfo, result); - KLD_UNLOCK(); + sx_xunlock(&kld_sx); return (error); } @@ -508,13 +518,13 @@ linker_release_module(const char *modname, struct mod_depend *verinfo, modlist_t mod; int error; - KLD_LOCK(); + sx_xlock(&kld_sx); if (lf == NULL) { KASSERT(modname != NULL, ("linker_release_module: no file or name")); mod = modlist_lookup2(modname, verinfo); if (mod == NULL) { - KLD_UNLOCK(); + sx_xunlock(&kld_sx); return (ESRCH); } lf = mod->container; @@ -522,7 +532,7 @@ linker_release_module(const char *modname, struct mod_depend *verinfo, KASSERT(modname == NULL && verinfo == NULL, ("linker_release_module: both file and name")); error = linker_file_unload(lf, LINKER_UNLOAD_NORMAL); - KLD_UNLOCK(); + sx_xunlock(&kld_sx); return (error); } @@ -535,7 +545,7 @@ linker_find_file_by_name(const char *filename) koname = malloc(strlen(filename) + 4, M_LINKER, M_WAITOK); sprintf(koname, "%s.ko", filename); - KLD_LOCK_ASSERT(); + sx_assert(&kld_sx, SA_XLOCKED); TAILQ_FOREACH(lf, &linker_files, link) { if (strcmp(lf->filename, koname) == 0) break; @@ -551,7 +561,7 @@ linker_find_file_by_id(int fileid) { linker_file_t lf; - KLD_LOCK_ASSERT(); + sx_assert(&kld_sx, SA_XLOCKED); TAILQ_FOREACH(lf, &linker_files, link) if (lf->id == fileid && lf->flags & LINKER_FILE_LINKED) break; @@ -564,13 +574,13 @@ linker_file_foreach(linker_predicate_t *predicate, void *context) linker_file_t lf; int retval = 0; - KLD_LOCK(); + sx_xlock(&kld_sx); TAILQ_FOREACH(lf, &linker_files, link) { retval = predicate(lf, context); if (retval != 0) break; } - KLD_UNLOCK(); + sx_xunlock(&kld_sx); return (retval); } @@ -580,7 +590,8 @@ linker_make_file(const char *pathname, linker_class_t lc) linker_file_t lf; const char *filename; - KLD_LOCK_ASSERT(); + if (!cold) + sx_assert(&kld_sx, SA_XLOCKED); filename = linker_basename(pathname); KLD_DPF(FILE, ("linker_make_file: new file, filename='%s' for pathname='%s'\n", filename, pathname)); @@ -596,8 +607,6 @@ linker_make_file(const char *pathname, linker_class_t lc) lf->ndeps = 0; lf->deps = NULL; lf->loadcnt = ++loadcnt; - lf->sdt_probes = NULL; - lf->sdt_nprobes = 0; STAILQ_INIT(&lf->common); TAILQ_INIT(&lf->modules); TAILQ_INSERT_TAIL(&linker_files, lf, link); @@ -616,7 +625,7 @@ linker_file_unload(linker_file_t file, int flags) if (prison0.pr_securelevel > 0) return (EPERM); - KLD_LOCK_ASSERT(); + sx_assert(&kld_sx, SA_XLOCKED); KLD_DPF(FILE, ("linker_file_unload: lf->refs=%d\n", file->refs)); /* Easy case of just dropping a reference. */ @@ -625,6 +634,12 @@ linker_file_unload(linker_file_t file, int flags) return (0); } + /* Give eventhandlers a chance to prevent the unload. */ + error = 0; + EVENTHANDLER_INVOKE(kld_unload_try, file, &error); + if (error != 0) + return (EBUSY); + KLD_DPF(FILE, ("linker_file_unload: file is unloading," " informing modules\n")); @@ -689,10 +704,8 @@ linker_file_unload(linker_file_t file, int flags) */ if (file->flags & LINKER_FILE_LINKED) { file->flags &= ~LINKER_FILE_LINKED; - KLD_UNLOCK(); linker_file_sysuninit(file); linker_file_unregister_sysctls(file); - KLD_LOCK(); } TAILQ_REMOVE(&linker_files, file, link); @@ -708,6 +721,10 @@ linker_file_unload(linker_file_t file, int flags) } LINKER_UNLOAD(file); + + EVENTHANDLER_INVOKE(kld_unload, file->filename, file->address, + file->size); + if (file->filename) { free(file->filename, M_LINKER); file->filename = NULL; @@ -731,18 +748,9 @@ linker_file_add_dependency(linker_file_t file, linker_file_t dep) { linker_file_t *newdeps; - KLD_LOCK_ASSERT(); - newdeps = malloc((file->ndeps + 1) * sizeof(linker_file_t *), + sx_assert(&kld_sx, SA_XLOCKED); + file->deps = realloc(file->deps, (file->ndeps + 1) * sizeof(*newdeps), M_LINKER, M_WAITOK | M_ZERO); - if (newdeps == NULL) - return (ENOMEM); - - if (file->deps) { - bcopy(file->deps, newdeps, - file->ndeps * sizeof(linker_file_t *)); - free(file->deps, M_LINKER); - } - file->deps = newdeps; file->deps[file->ndeps] = dep; file->ndeps++; KLD_DPF(FILE, ("linker_file_add_dependency:" @@ -761,15 +769,9 @@ int linker_file_lookup_set(linker_file_t file, const char *name, void *firstp, void *lastp, int *countp) { - int error, locked; - locked = KLD_LOCKED(); - if (!locked) - KLD_LOCK(); - error = LINKER_LOOKUP_SET(file, name, firstp, lastp, countp); - if (!locked) - KLD_UNLOCK(); - return (error); + sx_assert(&kld_sx, SA_LOCKED); + return (LINKER_LOOKUP_SET(file, name, firstp, lastp, countp)); } /* @@ -788,12 +790,12 @@ linker_file_lookup_symbol(linker_file_t file, const char *name, int deps) caddr_t sym; int locked; - locked = KLD_LOCKED(); + locked = sx_xlocked(&kld_sx); if (!locked) - KLD_LOCK(); + sx_xlock(&kld_sx); sym = linker_file_lookup_symbol_internal(file, name, deps); if (!locked) - KLD_UNLOCK(); + sx_xunlock(&kld_sx); return (sym); } @@ -807,7 +809,7 @@ linker_file_lookup_symbol_internal(linker_file_t file, const char *name, size_t common_size = 0; int i; - KLD_LOCK_ASSERT(); + sx_assert(&kld_sx, SA_XLOCKED); KLD_DPF(SYM, ("linker_file_lookup_symbol: file=%p, name=%s, deps=%d\n", file, name, deps)); @@ -1007,9 +1009,9 @@ linker_search_symbol_name(caddr_t value, char *buf, u_int buflen, { int error; - KLD_LOCK(); + sx_xlock(&kld_sx); error = linker_debug_search_symbol_name(value, buf, buflen, offset); - KLD_UNLOCK(); + sx_xunlock(&kld_sx); return (error); } @@ -1019,9 +1021,6 @@ linker_search_symbol_name(caddr_t value, char *buf, u_int buflen, int kern_kldload(struct thread *td, const char *file, int *fileid) { -#ifdef HWPMC_HOOKS - struct pmckern_map_in pkm; -#endif const char *kldname, *modname; linker_file_t lf; int error; @@ -1051,24 +1050,16 @@ kern_kldload(struct thread *td, const char *file, int *fileid) modname = file; } - KLD_LOCK(); + sx_xlock(&kld_sx); error = linker_load_module(kldname, modname, NULL, NULL, &lf); if (error) { - KLD_UNLOCK(); + sx_xunlock(&kld_sx); goto done; } lf->userrefs++; if (fileid != NULL) *fileid = lf->id; -#ifdef HWPMC_HOOKS - KLD_DOWNGRADE(); - pkm.pm_file = lf->filename; - pkm.pm_address = (uintptr_t) lf->address; - PMC_CALL_HOOK(td, PMC_FN_KLD_LOAD, (void *) &pkm); - KLD_UNLOCK_READ(); -#else - KLD_UNLOCK(); -#endif + sx_xunlock(&kld_sx); done: CURVNET_RESTORE(); @@ -1097,9 +1088,6 @@ sys_kldload(struct thread *td, struct kldload_args *uap) int kern_kldunload(struct thread *td, int fileid, int flags) { -#ifdef HWPMC_HOOKS - struct pmckern_map_out pkm; -#endif linker_file_t lf; int error = 0; @@ -1110,17 +1098,12 @@ kern_kldunload(struct thread *td, int fileid, int flags) return (error); CURVNET_SET(TD_TO_VNET(td)); - KLD_LOCK(); + sx_xlock(&kld_sx); lf = linker_find_file_by_id(fileid); if (lf) { KLD_DPF(FILE, ("kldunload: lf->userrefs=%d\n", lf->userrefs)); - /* Check if there are DTrace probes enabled on this file. */ - if (lf->nenabled > 0) { - printf("kldunload: attempt to unload file that has" - " DTrace probes enabled\n"); - error = EBUSY; - } else if (lf->userrefs == 0) { + if (lf->userrefs == 0) { /* * XXX: maybe LINKER_UNLOAD_FORCE should override ? */ @@ -1128,11 +1111,6 @@ kern_kldunload(struct thread *td, int fileid, int flags) " loaded by the kernel\n"); error = EBUSY; } else { -#ifdef HWPMC_HOOKS - /* Save data needed by hwpmc(4) before unloading. */ - pkm.pm_address = (uintptr_t) lf->address; - pkm.pm_size = lf->size; -#endif lf->userrefs--; error = linker_file_unload(lf, flags); if (error) @@ -1140,17 +1118,8 @@ kern_kldunload(struct thread *td, int fileid, int flags) } } else error = ENOENT; + sx_xunlock(&kld_sx); -#ifdef HWPMC_HOOKS - if (error == 0) { - KLD_DOWNGRADE(); - PMC_CALL_HOOK(td, PMC_FN_KLD_UNLOAD, (void *) &pkm); - KLD_UNLOCK_READ(); - } else - KLD_UNLOCK(); -#else - KLD_UNLOCK(); -#endif CURVNET_RESTORE(); return (error); } @@ -1193,13 +1162,13 @@ sys_kldfind(struct thread *td, struct kldfind_args *uap) goto out; filename = linker_basename(pathname); - KLD_LOCK(); + sx_xlock(&kld_sx); lf = linker_find_file_by_name(filename); if (lf) td->td_retval[0] = lf->id; else error = ENOENT; - KLD_UNLOCK(); + sx_xunlock(&kld_sx); out: free(pathname, M_TEMP); return (error); @@ -1217,7 +1186,7 @@ sys_kldnext(struct thread *td, struct kldnext_args *uap) return (error); #endif - KLD_LOCK(); + sx_xlock(&kld_sx); if (uap->fileid == 0) lf = TAILQ_FIRST(&linker_files); else { @@ -1238,7 +1207,7 @@ sys_kldnext(struct thread *td, struct kldnext_args *uap) else td->td_retval[0] = 0; out: - KLD_UNLOCK(); + sx_xunlock(&kld_sx); return (error); } @@ -1277,10 +1246,10 @@ kern_kldstat(struct thread *td, int fileid, struct kld_file_stat *stat) return (error); #endif - KLD_LOCK(); + sx_xlock(&kld_sx); lf = linker_find_file_by_id(fileid); if (lf == NULL) { - KLD_UNLOCK(); + sx_xunlock(&kld_sx); return (ENOENT); } @@ -1298,7 +1267,7 @@ kern_kldstat(struct thread *td, int fileid, struct kld_file_stat *stat) if (namelen > MAXPATHLEN) namelen = MAXPATHLEN; bcopy(lf->pathname, &stat->pathname[0], namelen); - KLD_UNLOCK(); + sx_xunlock(&kld_sx); td->td_retval[0] = 0; return (0); @@ -1317,7 +1286,7 @@ sys_kldfirstmod(struct thread *td, struct kldfirstmod_args *uap) return (error); #endif - KLD_LOCK(); + sx_xlock(&kld_sx); lf = linker_find_file_by_id(uap->fileid); if (lf) { MOD_SLOCK; @@ -1329,7 +1298,7 @@ sys_kldfirstmod(struct thread *td, struct kldfirstmod_args *uap) MOD_SUNLOCK; } else error = ENOENT; - KLD_UNLOCK(); + sx_xunlock(&kld_sx); return (error); } @@ -1357,7 +1326,7 @@ sys_kldsym(struct thread *td, struct kldsym_args *uap) symstr = malloc(MAXPATHLEN, M_TEMP, M_WAITOK); if ((error = copyinstr(lookup.symname, symstr, MAXPATHLEN, NULL)) != 0) goto out; - KLD_LOCK(); + sx_xlock(&kld_sx); if (uap->fileid != 0) { lf = linker_find_file_by_id(uap->fileid); if (lf == NULL) @@ -1383,7 +1352,7 @@ sys_kldsym(struct thread *td, struct kldsym_args *uap) if (lf == NULL) error = ENOENT; } - KLD_UNLOCK(); + sx_xunlock(&kld_sx); out: free(symstr, M_TEMP); return (error); @@ -1492,6 +1461,7 @@ linker_preload(void *arg) error = 0; modptr = NULL; + sx_xlock(&kld_sx); while ((modptr = preload_search_next_name(modptr)) != NULL) { modname = (char *)preload_search_info(modptr, MODINFO_NAME); modtype = (char *)preload_search_info(modptr, MODINFO_TYPE); @@ -1673,6 +1643,7 @@ fail: TAILQ_REMOVE(&depended_files, lf, loaded); linker_file_unload(lf, LINKER_UNLOAD_FORCE); } + sx_xunlock(&kld_sx); /* woohoo! we made it! */ } @@ -1978,7 +1949,7 @@ linker_hwpmc_list_objects(void) int i, nmappings; nmappings = 0; - KLD_LOCK_READ(); + sx_slock(&kld_sx); TAILQ_FOREACH(lf, &linker_files, link) nmappings++; @@ -1993,7 +1964,7 @@ linker_hwpmc_list_objects(void) kobase[i].pm_address = (uintptr_t)lf->address; i++; } - KLD_UNLOCK_READ(); + sx_sunlock(&kld_sx); KASSERT(i > 0, ("linker_hpwmc_list_objects: no kernel objects?")); @@ -2019,7 +1990,7 @@ linker_load_module(const char *kldname, const char *modname, char *pathname; int error; - KLD_LOCK_ASSERT(); + sx_assert(&kld_sx, SA_XLOCKED); if (modname == NULL) { /* * We have to load KLD @@ -2093,7 +2064,7 @@ linker_load_dependencies(linker_file_t lf) /* * All files are dependant on /kernel. */ - KLD_LOCK_ASSERT(); + sx_assert(&kld_sx, SA_XLOCKED); if (linker_kernel_file) { linker_kernel_file->refs++; error = linker_file_add_dependency(lf, linker_kernel_file); @@ -2185,16 +2156,16 @@ sysctl_kern_function_list(SYSCTL_HANDLER_ARGS) error = sysctl_wire_old_buffer(req, 0); if (error != 0) return (error); - KLD_LOCK(); + sx_xlock(&kld_sx); TAILQ_FOREACH(lf, &linker_files, link) { error = LINKER_EACH_FUNCTION_NAME(lf, sysctl_kern_function_list_iterate, req); if (error) { - KLD_UNLOCK(); + sx_xunlock(&kld_sx); return (error); } } - KLD_UNLOCK(); + sx_xunlock(&kld_sx); return (SYSCTL_OUT(req, "", 1)); } diff --git a/freebsd/sys/kern/kern_mbuf.c b/freebsd/sys/kern/kern_mbuf.c index 98cfb1f0..74e7aa10 100644 --- a/freebsd/sys/kern/kern_mbuf.c +++ b/freebsd/sys/kern/kern_mbuf.c @@ -2,7 +2,7 @@ /*- * Copyright (c) 2004, 2005, - * Bosko Milekic <bmilekic@FreeBSD.org>. All rights reserved. + * Bosko Milekic <bmilekic@FreeBSD.org>. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -49,9 +49,13 @@ __FBSDID("$FreeBSD$"); #include <vm/vm_extern.h> #include <vm/vm_kern.h> #include <vm/vm_page.h> +#include <vm/vm_map.h> #include <vm/uma.h> #include <vm/uma_int.h> #include <vm/uma_dbg.h> +#ifdef __rtems__ +#include <rtems/bsd/bsd.h> +#endif /* __rtems__ */ /* * In FreeBSD, Mbufs and Mbuf Clusters are allocated from UMA @@ -78,7 +82,7 @@ __FBSDID("$FreeBSD$"); * [ Cluster Zone ] [ Zone ] [ Mbuf Master Zone ] * | \________ | * [ Cluster Keg ] \ / - * | [ Mbuf Keg ] + * | [ Mbuf Keg ] * [ Cluster Slabs ] | * | [ Mbuf Slabs ] * \____________(VM)_________________/ @@ -98,39 +102,69 @@ __FBSDID("$FreeBSD$"); * */ +int nmbufs; /* limits number of mbufs */ int nmbclusters; /* limits number of mbuf clusters */ int nmbjumbop; /* limits number of page size jumbo clusters */ int nmbjumbo9; /* limits number of 9k jumbo clusters */ int nmbjumbo16; /* limits number of 16k jumbo clusters */ struct mbstat mbstat; +static quad_t maxmbufmem; /* overall real memory limit for all mbufs */ + +SYSCTL_QUAD(_kern_ipc, OID_AUTO, maxmbufmem, CTLFLAG_RDTUN, &maxmbufmem, 0, + "Maximum real memory allocateable to various mbuf types"); + /* - * tunable_mbinit() has to be run before init_maxsockets() thus - * the SYSINIT order below is SI_ORDER_MIDDLE while init_maxsockets() - * runs at SI_ORDER_ANY. + * tunable_mbinit() has to be run before any mbuf allocations are done. */ static void tunable_mbinit(void *dummy) { +#ifndef __rtems__ + quad_t realmem; + + /* + * The default limit for all mbuf related memory is 1/2 of all + * available kernel memory (physical or kmem). + * At most it can be 3/4 of available kernel memory. + */ + realmem = qmin((quad_t)physmem * PAGE_SIZE, + vm_map_max(kmem_map) - vm_map_min(kmem_map)); + maxmbufmem = realmem / 2; + TUNABLE_QUAD_FETCH("kern.ipc.maxmbufmem", &maxmbufmem); + if (maxmbufmem > realmem / 4 * 3) + maxmbufmem = realmem / 4 * 3; +#else /* __rtems__ */ + maxmbufmem = rtems_bsd_get_allocator_domain_size( + RTEMS_BSD_ALLOCATOR_DOMAIN_MBUF); +#endif /* __rtems__ */ - /* This has to be done before VM init. */ TUNABLE_INT_FETCH("kern.ipc.nmbclusters", &nmbclusters); if (nmbclusters == 0) - nmbclusters = 1024 + maxusers * 64; + nmbclusters = maxmbufmem / MCLBYTES / 4; TUNABLE_INT_FETCH("kern.ipc.nmbjumbop", &nmbjumbop); if (nmbjumbop == 0) - nmbjumbop = nmbclusters / 2; + nmbjumbop = maxmbufmem / MJUMPAGESIZE / 4; TUNABLE_INT_FETCH("kern.ipc.nmbjumbo9", &nmbjumbo9); if (nmbjumbo9 == 0) - nmbjumbo9 = nmbclusters / 4; + nmbjumbo9 = maxmbufmem / MJUM9BYTES / 6; TUNABLE_INT_FETCH("kern.ipc.nmbjumbo16", &nmbjumbo16); if (nmbjumbo16 == 0) - nmbjumbo16 = nmbclusters / 8; + nmbjumbo16 = maxmbufmem / MJUM16BYTES / 6; + + /* + * We need at least as many mbufs as we have clusters of + * the various types added together. + */ + TUNABLE_INT_FETCH("kern.ipc.nmbufs", &nmbufs); + if (nmbufs < nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) + nmbufs = lmax(maxmbufmem / MSIZE / 5, + nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16); } -SYSINIT(tunable_mbinit, SI_SUB_TUNABLES, SI_ORDER_MIDDLE, tunable_mbinit, NULL); +SYSINIT(tunable_mbinit, SI_SUB_KMEM, SI_ORDER_MIDDLE, tunable_mbinit, NULL); static int sysctl_nmbclusters(SYSCTL_HANDLER_ARGS) @@ -138,11 +172,12 @@ sysctl_nmbclusters(SYSCTL_HANDLER_ARGS) int error, newnmbclusters; newnmbclusters = nmbclusters; - error = sysctl_handle_int(oidp, &newnmbclusters, 0, req); - if (error == 0 && req->newptr) { - if (newnmbclusters > nmbclusters) { + error = sysctl_handle_int(oidp, &newnmbclusters, 0, req); + if (error == 0 && req->newptr && newnmbclusters != nmbclusters) { + if (newnmbclusters > nmbclusters && + nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) { nmbclusters = newnmbclusters; - uma_zone_set_max(zone_clust, nmbclusters); + nmbclusters = uma_zone_set_max(zone_clust, nmbclusters); #ifndef __rtems__ EVENTHANDLER_INVOKE(nmbclusters_change); #endif /* __rtems__ */ @@ -161,11 +196,12 @@ sysctl_nmbjumbop(SYSCTL_HANDLER_ARGS) int error, newnmbjumbop; newnmbjumbop = nmbjumbop; - error = sysctl_handle_int(oidp, &newnmbjumbop, 0, req); - if (error == 0 && req->newptr) { - if (newnmbjumbop> nmbjumbop) { + error = sysctl_handle_int(oidp, &newnmbjumbop, 0, req); + if (error == 0 && req->newptr && newnmbjumbop != nmbjumbop) { + if (newnmbjumbop > nmbjumbop && + nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) { nmbjumbop = newnmbjumbop; - uma_zone_set_max(zone_jumbop, nmbjumbop); + nmbjumbop = uma_zone_set_max(zone_jumbop, nmbjumbop); } else error = EINVAL; } @@ -173,8 +209,7 @@ sysctl_nmbjumbop(SYSCTL_HANDLER_ARGS) } SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbop, CTLTYPE_INT|CTLFLAG_RW, &nmbjumbop, 0, sysctl_nmbjumbop, "IU", - "Maximum number of mbuf page size jumbo clusters allowed"); - + "Maximum number of mbuf page size jumbo clusters allowed"); static int sysctl_nmbjumbo9(SYSCTL_HANDLER_ARGS) @@ -182,11 +217,12 @@ sysctl_nmbjumbo9(SYSCTL_HANDLER_ARGS) int error, newnmbjumbo9; newnmbjumbo9 = nmbjumbo9; - error = sysctl_handle_int(oidp, &newnmbjumbo9, 0, req); - if (error == 0 && req->newptr) { - if (newnmbjumbo9> nmbjumbo9) { + error = sysctl_handle_int(oidp, &newnmbjumbo9, 0, req); + if (error == 0 && req->newptr && newnmbjumbo9 != nmbjumbo9) { + if (newnmbjumbo9 > nmbjumbo9 && + nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) { nmbjumbo9 = newnmbjumbo9; - uma_zone_set_max(zone_jumbo9, nmbjumbo9); + nmbjumbo9 = uma_zone_set_max(zone_jumbo9, nmbjumbo9); } else error = EINVAL; } @@ -194,7 +230,7 @@ sysctl_nmbjumbo9(SYSCTL_HANDLER_ARGS) } SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbo9, CTLTYPE_INT|CTLFLAG_RW, &nmbjumbo9, 0, sysctl_nmbjumbo9, "IU", - "Maximum number of mbuf 9k jumbo clusters allowed"); + "Maximum number of mbuf 9k jumbo clusters allowed"); static int sysctl_nmbjumbo16(SYSCTL_HANDLER_ARGS) @@ -202,11 +238,12 @@ sysctl_nmbjumbo16(SYSCTL_HANDLER_ARGS) int error, newnmbjumbo16; newnmbjumbo16 = nmbjumbo16; - error = sysctl_handle_int(oidp, &newnmbjumbo16, 0, req); - if (error == 0 && req->newptr) { - if (newnmbjumbo16> nmbjumbo16) { + error = sysctl_handle_int(oidp, &newnmbjumbo16, 0, req); + if (error == 0 && req->newptr && newnmbjumbo16 != nmbjumbo16) { + if (newnmbjumbo16 > nmbjumbo16 && + nmbufs >= nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) { nmbjumbo16 = newnmbjumbo16; - uma_zone_set_max(zone_jumbo16, nmbjumbo16); + nmbjumbo16 = uma_zone_set_max(zone_jumbo16, nmbjumbo16); } else error = EINVAL; } @@ -216,7 +253,26 @@ SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbjumbo16, CTLTYPE_INT|CTLFLAG_RW, &nmbjumbo16, 0, sysctl_nmbjumbo16, "IU", "Maximum number of mbuf 16k jumbo clusters allowed"); - +static int +sysctl_nmbufs(SYSCTL_HANDLER_ARGS) +{ + int error, newnmbufs; + + newnmbufs = nmbufs; + error = sysctl_handle_int(oidp, &newnmbufs, 0, req); + if (error == 0 && req->newptr && newnmbufs != nmbufs) { + if (newnmbufs > nmbufs) { + nmbufs = newnmbufs; + nmbufs = uma_zone_set_max(zone_mbuf, nmbufs); + EVENTHANDLER_INVOKE(nmbufs_change); + } else + error = EINVAL; + } + return (error); +} +SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbufs, CTLTYPE_INT|CTLFLAG_RW, +&nmbufs, 0, sysctl_nmbufs, "IU", + "Maximum number of mbufs allowed"); SYSCTL_STRUCT(_kern_ipc, OID_AUTO, mbstat, CTLFLAG_RD, &mbstat, mbstat, "Mbuf general information and statistics"); @@ -245,7 +301,6 @@ static int mb_zinit_pack(void *, int, int); static void mb_zfini_pack(void *, int); static void mb_reclaim(void *); -static void mbuf_init(void *); static void *mbuf_jumbo_alloc(uma_zone_t, int, uint8_t *, int); /* Ensure that MSIZE doesn't break dtom() - it must be a power of 2 */ @@ -254,7 +309,6 @@ CTASSERT((((MSIZE - 1) ^ MSIZE) + 1) >> 1 == MSIZE); /* * Initialize FreeBSD Network buffer allocation. */ -SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbuf_init, NULL); static void mbuf_init(void *dummy) { @@ -270,6 +324,8 @@ mbuf_init(void *dummy) NULL, NULL, #endif MSIZE - 1, UMA_ZONE_MAXBUCKET); + if (nmbufs > 0) + nmbufs = uma_zone_set_max(zone_mbuf, nmbufs); zone_clust = uma_zcreate(MBUF_CLUSTER_MEM_NAME, MCLBYTES, mb_ctor_clust, mb_dtor_clust, @@ -280,7 +336,7 @@ mbuf_init(void *dummy) #endif UMA_ALIGN_PTR, UMA_ZONE_REFCNT); if (nmbclusters > 0) - uma_zone_set_max(zone_clust, nmbclusters); + nmbclusters = uma_zone_set_max(zone_clust, nmbclusters); zone_pack = uma_zsecond_create(MBUF_PACKET_MEM_NAME, mb_ctor_pack, mb_dtor_pack, mb_zinit_pack, mb_zfini_pack, zone_mbuf); @@ -295,7 +351,7 @@ mbuf_init(void *dummy) #endif UMA_ALIGN_PTR, UMA_ZONE_REFCNT); if (nmbjumbop > 0) - uma_zone_set_max(zone_jumbop, nmbjumbop); + nmbjumbop = uma_zone_set_max(zone_jumbop, nmbjumbop); zone_jumbo9 = uma_zcreate(MBUF_JUMBO9_MEM_NAME, MJUM9BYTES, mb_ctor_clust, mb_dtor_clust, @@ -305,9 +361,9 @@ mbuf_init(void *dummy) NULL, NULL, #endif UMA_ALIGN_PTR, UMA_ZONE_REFCNT); - if (nmbjumbo9 > 0) - uma_zone_set_max(zone_jumbo9, nmbjumbo9); uma_zone_set_allocf(zone_jumbo9, mbuf_jumbo_alloc); + if (nmbjumbo9 > 0) + nmbjumbo9 = uma_zone_set_max(zone_jumbo9, nmbjumbo9); zone_jumbo16 = uma_zcreate(MBUF_JUMBO16_MEM_NAME, MJUM16BYTES, mb_ctor_clust, mb_dtor_clust, @@ -317,9 +373,9 @@ mbuf_init(void *dummy) NULL, NULL, #endif UMA_ALIGN_PTR, UMA_ZONE_REFCNT); - if (nmbjumbo16 > 0) - uma_zone_set_max(zone_jumbo16, nmbjumbo16); uma_zone_set_allocf(zone_jumbo16, mbuf_jumbo_alloc); + if (nmbjumbo16 > 0) + nmbjumbo16 = uma_zone_set_max(zone_jumbo16, nmbjumbo16); zone_ext_refcnt = uma_zcreate(MBUF_EXTREFCNT_MEM_NAME, sizeof(u_int), NULL, NULL, @@ -357,6 +413,7 @@ mbuf_init(void *dummy) mbstat.sf_iocnt = 0; mbstat.sf_allocwait = mbstat.sf_allocfail = 0; } +SYSINIT(mbuf, SI_SUB_MBUF, SI_ORDER_FIRST, mbuf_init, NULL); /* * UMA backend page allocator for the jumbo frame zones. @@ -445,7 +502,7 @@ static void mb_dtor_mbuf(void *mem, int size, void *arg) { struct mbuf *m; - unsigned long flags; + unsigned long flags; m = (struct mbuf *)mem; flags = (unsigned long)arg; diff --git a/freebsd/sys/kern/kern_mib.c b/freebsd/sys/kern/kern_mib.c index a8215b39..c3605582 100644 --- a/freebsd/sys/kern/kern_mib.c +++ b/freebsd/sys/kern/kern_mib.c @@ -271,6 +271,13 @@ SYSCTL_PROC(_hw, HW_MACHINE_ARCH, machine_arch, CTLTYPE_STRING | CTLFLAG_RD, NULL, 0, sysctl_hw_machine_arch, "A", "System architecture"); #endif /* __rtems__ */ +SYSCTL_STRING(_kern, OID_AUTO, supported_archs, CTLFLAG_RD | CTLFLAG_MPSAFE, +#ifdef COMPAT_FREEBSD32 + MACHINE_ARCH " " MACHINE_ARCH32, 0, "Supported architectures for binaries"); +#else + MACHINE_ARCH, 0, "Supported architectures for binaries"); +#endif + static int sysctl_hostname(SYSCTL_HANDLER_ARGS) { diff --git a/freebsd/sys/kern/kern_time.c b/freebsd/sys/kern/kern_time.c index e113aef6..dbb10d01 100644 --- a/freebsd/sys/kern/kern_time.c +++ b/freebsd/sys/kern/kern_time.c @@ -60,6 +60,12 @@ __FBSDID("$FreeBSD$"); #include <vm/vm_extern.h> #define MAX_CLOCKS (CLOCK_MONOTONIC+1) +#define CPUCLOCK_BIT 0x80000000 +#define CPUCLOCK_PROCESS_BIT 0x40000000 +#define CPUCLOCK_ID_MASK (~(CPUCLOCK_BIT|CPUCLOCK_PROCESS_BIT)) +#define MAKE_THREAD_CPUCLOCK(tid) (CPUCLOCK_BIT|(tid)) +#define MAKE_PROCESS_CPUCLOCK(pid) \ + (CPUCLOCK_BIT|CPUCLOCK_PROCESS_BIT|(pid)) #ifndef __rtems__ static struct kclock posix_clocks[MAX_CLOCKS]; @@ -96,9 +102,6 @@ static int realtimer_settime(struct itimer *, int, static int realtimer_delete(struct itimer *); static void realtimer_clocktime(clockid_t, struct timespec *); static void realtimer_expire(void *); -static int kern_timer_create(struct thread *, clockid_t, - struct sigevent *, int *, int); -static int kern_timer_delete(struct thread *, int); int register_posix_clock(int, struct kclock *); void itimer_fire(struct itimer *it); @@ -170,6 +173,60 @@ settime(struct thread *td, struct timeval *tv) } #ifndef _SYS_SYSPROTO_H_ +struct clock_getcpuclockid2_args { + id_t id; + int which, + clockid_t *clock_id; +}; +#endif +/* ARGSUSED */ +int +sys_clock_getcpuclockid2(struct thread *td, struct clock_getcpuclockid2_args *uap) +{ + clockid_t clk_id; + int error; + + error = kern_clock_getcpuclockid2(td, uap->id, uap->which, &clk_id); + if (error == 0) + error = copyout(&clk_id, uap->clock_id, sizeof(clockid_t)); + return (error); +} + +int +kern_clock_getcpuclockid2(struct thread *td, id_t id, int which, + clockid_t *clk_id) +{ + struct proc *p; + pid_t pid; + lwpid_t tid; + int error; + + switch (which) { + case CPUCLOCK_WHICH_PID: + if (id != 0) { + p = pfind(id); + if (p == NULL) + return (ESRCH); + error = p_cansee(td, p); + PROC_UNLOCK(p); + if (error != 0) + return (error); + pid = id; + } else { + pid = td->td_proc->p_pid; + } + *clk_id = MAKE_PROCESS_CPUCLOCK(pid); + return (0); + case CPUCLOCK_WHICH_TID: + tid = id == 0 ? td->td_tid : id; + *clk_id = MAKE_THREAD_CPUCLOCK(tid); + return (0); + default: + return (EINVAL); + } +} + +#ifndef _SYS_SYSPROTO_H_ struct clock_gettime_args { clockid_t clock_id; struct timespec *tp; @@ -192,12 +249,80 @@ sys_clock_gettime(struct thread *td, struct clock_gettime_args *uap) #endif #ifndef __rtems__ +static inline void +cputick2timespec(uint64_t runtime, struct timespec *ats) +{ + runtime = cputick2usec(runtime); + ats->tv_sec = runtime / 1000000; + ats->tv_nsec = runtime % 1000000 * 1000; +} + +static void +get_thread_cputime(struct thread *targettd, struct timespec *ats) +{ + uint64_t runtime, curtime, switchtime; + + if (targettd == NULL) { /* current thread */ + critical_enter(); + switchtime = PCPU_GET(switchtime); + curtime = cpu_ticks(); + runtime = curthread->td_runtime; + critical_exit(); + runtime += curtime - switchtime; + } else { + thread_lock(targettd); + runtime = targettd->td_runtime; + thread_unlock(targettd); + } + cputick2timespec(runtime, ats); +} + +static void +get_process_cputime(struct proc *targetp, struct timespec *ats) +{ + uint64_t runtime; + struct rusage ru; + + PROC_SLOCK(targetp); + rufetch(targetp, &ru); + runtime = targetp->p_rux.rux_runtime; + PROC_SUNLOCK(targetp); + cputick2timespec(runtime, ats); +} + +static int +get_cputime(struct thread *td, clockid_t clock_id, struct timespec *ats) +{ + struct proc *p, *p2; + struct thread *td2; + lwpid_t tid; + pid_t pid; + int error; + + p = td->td_proc; + if ((clock_id & CPUCLOCK_PROCESS_BIT) == 0) { + tid = clock_id & CPUCLOCK_ID_MASK; + td2 = tdfind(tid, p->p_pid); + if (td2 == NULL) + return (EINVAL); + get_thread_cputime(td2, ats); + PROC_UNLOCK(td2->td_proc); + } else { + pid = clock_id & CPUCLOCK_ID_MASK; + error = pget(pid, PGET_CANSEE, &p2); + if (error != 0) + return (EINVAL); + get_process_cputime(p2, ats); + PROC_UNLOCK(p2); + } + return (0); +} + int kern_clock_gettime(struct thread *td, clockid_t clock_id, struct timespec *ats) { struct timeval sys, user; struct proc *p; - uint64_t runtime, curtime, switchtime; p = td->td_proc; switch (clock_id) { @@ -240,17 +365,17 @@ kern_clock_gettime(struct thread *td, clockid_t clock_id, struct timespec *ats) ats->tv_nsec = 0; break; case CLOCK_THREAD_CPUTIME_ID: - critical_enter(); - switchtime = PCPU_GET(switchtime); - curtime = cpu_ticks(); - runtime = td->td_runtime; - critical_exit(); - runtime = cputick2usec(runtime + curtime - switchtime); - ats->tv_sec = runtime / 1000000; - ats->tv_nsec = runtime % 1000000 * 1000; + get_thread_cputime(NULL, ats); + break; + case CLOCK_PROCESS_CPUTIME_ID: + PROC_LOCK(p); + get_process_cputime(p, ats); + PROC_UNLOCK(p); break; default: - return (EINVAL); + if ((int)clock_id >= 0) + return (EINVAL); + return (get_cputime(td, clock_id, ats)); } return (0); } @@ -348,12 +473,16 @@ kern_clock_getres(struct thread *td, clockid_t clock_id, struct timespec *ts) ts->tv_nsec = 0; break; case CLOCK_THREAD_CPUTIME_ID: + case CLOCK_PROCESS_CPUTIME_ID: + cputime: /* sync with cputick2usec */ ts->tv_nsec = 1000000 / cpu_tickrate(); if (ts->tv_nsec == 0) ts->tv_nsec = 1000; break; default: + if ((int)clock_id < 0) + goto cputime; return (EINVAL); } return (0); @@ -939,31 +1068,30 @@ struct ktimer_create_args { int sys_ktimer_create(struct thread *td, struct ktimer_create_args *uap) { - struct sigevent *evp1, ev; + struct sigevent *evp, ev; int id; int error; - if (uap->evp != NULL) { + if (uap->evp == NULL) { + evp = NULL; + } else { error = copyin(uap->evp, &ev, sizeof(ev)); if (error != 0) return (error); - evp1 = &ev; - } else - evp1 = NULL; - - error = kern_timer_create(td, uap->clock_id, evp1, &id, -1); - + evp = &ev; + } + error = kern_ktimer_create(td, uap->clock_id, evp, &id, -1); if (error == 0) { error = copyout(&id, uap->timerid, sizeof(int)); if (error != 0) - kern_timer_delete(td, id); + kern_ktimer_delete(td, id); } return (error); } -static int -kern_timer_create(struct thread *td, clockid_t clock_id, - struct sigevent *evp, int *timerid, int preset_id) +int +kern_ktimer_create(struct thread *td, clockid_t clock_id, struct sigevent *evp, + int *timerid, int preset_id) { struct proc *p = td->td_proc; struct itimer *it; @@ -1078,7 +1206,8 @@ struct ktimer_delete_args { int sys_ktimer_delete(struct thread *td, struct ktimer_delete_args *uap) { - return (kern_timer_delete(td, uap->timerid)); + + return (kern_ktimer_delete(td, uap->timerid)); } static struct itimer * @@ -1100,8 +1229,8 @@ itimer_find(struct proc *p, int timerid) return (it); } -static int -kern_timer_delete(struct thread *td, int timerid) +int +kern_ktimer_delete(struct thread *td, int timerid) { struct proc *p = td->td_proc; struct itimer *it; @@ -1143,35 +1272,40 @@ struct ktimer_settime_args { int sys_ktimer_settime(struct thread *td, struct ktimer_settime_args *uap) { - struct proc *p = td->td_proc; - struct itimer *it; struct itimerspec val, oval, *ovalp; int error; error = copyin(uap->value, &val, sizeof(val)); if (error != 0) return (error); - - if (uap->ovalue != NULL) - ovalp = &oval; - else - ovalp = NULL; + ovalp = uap->ovalue != NULL ? &oval : NULL; + error = kern_ktimer_settime(td, uap->timerid, uap->flags, &val, ovalp); + if (error == 0 && uap->ovalue != NULL) + error = copyout(ovalp, uap->ovalue, sizeof(*ovalp)); + return (error); +} +int +kern_ktimer_settime(struct thread *td, int timer_id, int flags, + struct itimerspec *val, struct itimerspec *oval) +{ + struct proc *p; + struct itimer *it; + int error; + + p = td->td_proc; PROC_LOCK(p); - if (uap->timerid < 3 || - (it = itimer_find(p, uap->timerid)) == NULL) { + if (timer_id < 3 || (it = itimer_find(p, timer_id)) == NULL) { PROC_UNLOCK(p); error = EINVAL; } else { PROC_UNLOCK(p); itimer_enter(it); - error = CLOCK_CALL(it->it_clockid, timer_settime, - (it, uap->flags, &val, ovalp)); + error = CLOCK_CALL(it->it_clockid, timer_settime, (it, + flags, val, oval)); itimer_leave(it); ITIMER_UNLOCK(it); } - if (error == 0 && uap->ovalue != NULL) - error = copyout(ovalp, uap->ovalue, sizeof(*ovalp)); return (error); } @@ -1184,26 +1318,34 @@ struct ktimer_gettime_args { int sys_ktimer_gettime(struct thread *td, struct ktimer_gettime_args *uap) { - struct proc *p = td->td_proc; - struct itimer *it; struct itimerspec val; int error; + error = kern_ktimer_gettime(td, uap->timerid, &val); + if (error == 0) + error = copyout(&val, uap->value, sizeof(val)); + return (error); +} + +int +kern_ktimer_gettime(struct thread *td, int timer_id, struct itimerspec *val) +{ + struct proc *p; + struct itimer *it; + int error; + + p = td->td_proc; PROC_LOCK(p); - if (uap->timerid < 3 || - (it = itimer_find(p, uap->timerid)) == NULL) { + if (timer_id < 3 || (it = itimer_find(p, timer_id)) == NULL) { PROC_UNLOCK(p); error = EINVAL; } else { PROC_UNLOCK(p); itimer_enter(it); - error = CLOCK_CALL(it->it_clockid, timer_gettime, - (it, &val)); + error = CLOCK_CALL(it->it_clockid, timer_gettime, (it, val)); itimer_leave(it); ITIMER_UNLOCK(it); } - if (error == 0) - error = copyout(&val, uap->value, sizeof(val)); return (error); } @@ -1498,7 +1640,7 @@ itimers_event_hook_exit(void *arg, struct proc *p) panic("unhandled event"); for (; i < TIMER_MAX; ++i) { if ((it = its->its_timers[i]) != NULL) - kern_timer_delete(curthread, i); + kern_ktimer_delete(curthread, i); } if (its->its_timers[0] == NULL && its->its_timers[1] == NULL && diff --git a/freebsd/sys/kern/kern_timeout.c b/freebsd/sys/kern/kern_timeout.c index 1ca98d9e..821b035d 100644 --- a/freebsd/sys/kern/kern_timeout.c +++ b/freebsd/sys/kern/kern_timeout.c @@ -66,11 +66,9 @@ __FBSDID("$FreeBSD$"); #define ncallout 16 #endif /* __rtems__ */ SDT_PROVIDER_DEFINE(callout_execute); -SDT_PROBE_DEFINE(callout_execute, kernel, , callout_start, callout-start); -SDT_PROBE_ARGTYPE(callout_execute, kernel, , callout_start, 0, +SDT_PROBE_DEFINE1(callout_execute, kernel, , callout__start, "struct callout *"); -SDT_PROBE_DEFINE(callout_execute, kernel, , callout_end, callout-end); -SDT_PROBE_ARGTYPE(callout_execute, kernel, , callout_end, 0, +SDT_PROBE_DEFINE1(callout_execute, kernel, , callout__end, "struct callout *"); static int avg_depth; @@ -251,7 +249,7 @@ rtems_bsd_timeout_init_late(void *unused) SYSINIT(rtems_bsd_timeout_early, SI_SUB_VM, SI_ORDER_FIRST, rtems_bsd_timeout_init_early, NULL); -SYSINIT(rtems_bsd_timeout_late, SI_SUB_RUN_SCHEDULER, SI_ORDER_FIRST, +SYSINIT(rtems_bsd_timeout_late, SI_SUB_LAST, SI_ORDER_FIRST, rtems_bsd_timeout_init_late, NULL); static void @@ -580,11 +578,11 @@ softclock_call_cc(struct callout *c, struct callout_cpu *cc, int *mpcalls, #endif #ifndef __rtems__ THREAD_NO_SLEEPING(); - SDT_PROBE(callout_execute, kernel, , callout_start, c, 0, 0, 0, 0); + SDT_PROBE(callout_execute, kernel, , callout__start, c, 0, 0, 0, 0); #endif /* __rtems__ */ c_func(c_arg); #ifndef __rtems__ - SDT_PROBE(callout_execute, kernel, , callout_end, c, 0, 0, 0, 0); + SDT_PROBE(callout_execute, kernel, , callout__end, c, 0, 0, 0, 0); THREAD_SLEEPING_OK(); #endif /* __rtems__ */ #ifdef DIAGNOSTIC @@ -943,11 +941,13 @@ _callout_stop_safe(c, safe) struct callout *c; int safe; { +#ifndef __rtems__ struct callout_cpu *cc, *old_cc; struct lock_class *class; -#ifndef __rtems__ int use_lock, sq_locked; #else /* __rtems__ */ + struct callout_cpu *cc; + struct lock_class *class; int use_lock; #endif /* __rtems__ */ diff --git a/freebsd/sys/kern/subr_lock.c b/freebsd/sys/kern/subr_lock.c index dea7d408..4a55a95a 100644 --- a/freebsd/sys/kern/subr_lock.c +++ b/freebsd/sys/kern/subr_lock.c @@ -12,9 +12,6 @@ * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. - * 3. Neither the name of the author nor the names of any co-contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE @@ -69,6 +66,7 @@ struct lock_class *lock_classes[LOCK_CLASS_MAX + 1] = { &lock_class_sx, #ifndef __rtems__ &lock_class_rm, + &lock_class_rm_sleepable, #endif /* __rtems__ */ &lock_class_rw, #ifndef __rtems__ diff --git a/freebsd/sys/kern/subr_rman.c b/freebsd/sys/kern/subr_rman.c index 668201a9..c158b36e 100644 --- a/freebsd/sys/kern/subr_rman.c +++ b/freebsd/sys/kern/subr_rman.c @@ -453,7 +453,7 @@ rman_reserve_resource_bound(struct rman *rm, u_long start, u_long end, mtx_lock(rm->rm_mtx); for (r = TAILQ_FIRST(&rm->rm_list); - r && r->r_end < start; + r && r->r_end < start + count - 1; r = TAILQ_NEXT(r, r_link)) ; @@ -463,6 +463,11 @@ rman_reserve_resource_bound(struct rman *rm, u_long start, u_long end, } amask = (1ul << RF_ALIGNMENT(flags)) - 1; + if (start > ULONG_MAX - amask) { + DPRINTF(("start+amask would wrap around\n")); + goto out; + } + /* If bound is 0, bmask will also be 0 */ bmask = ~(bound - 1); /* @@ -470,11 +475,20 @@ rman_reserve_resource_bound(struct rman *rm, u_long start, u_long end, */ for (s = r; s; s = TAILQ_NEXT(s, r_link)) { DPRINTF(("considering [%#lx, %#lx]\n", s->r_start, s->r_end)); - if (s->r_start + count - 1 > end) { + /* + * The resource list is sorted, so there is no point in + * searching further once r_start is too large. + */ + if (s->r_start > end - (count - 1)) { DPRINTF(("s->r_start (%#lx) + count - 1> end (%#lx)\n", s->r_start, end)); break; } + if (s->r_start > ULONG_MAX - amask) { + DPRINTF(("s->r_start (%#lx) + amask (%#lx) too large\n", + s->r_start, amask)); + break; + } if (s->r_flags & RF_ALLOCATED) { DPRINTF(("region is allocated\n")); continue; @@ -585,15 +599,10 @@ rman_reserve_resource_bound(struct rman *rm, u_long start, u_long end, if ((flags & (RF_SHAREABLE | RF_TIMESHARE)) == 0) goto out; - for (s = r; s; s = TAILQ_NEXT(s, r_link)) { - if (s->r_start > end) - break; - if ((s->r_flags & flags) != flags) - continue; - rstart = ulmax(s->r_start, start); - rend = ulmin(s->r_end, ulmax(start + count - 1, end)); - if (s->r_start >= start && s->r_end <= end - && (s->r_end - s->r_start + 1) == count && + for (s = r; s && s->r_end <= end; s = TAILQ_NEXT(s, r_link)) { + if ((s->r_flags & flags) == flags && + s->r_start >= start && + (s->r_end - s->r_start + 1) == count && (s->r_start & amask) == 0 && ((s->r_start ^ s->r_end) & bmask) == 0) { rv = int_alloc_resource(M_NOWAIT); diff --git a/freebsd/sys/kern/subr_sbuf.c b/freebsd/sys/kern/subr_sbuf.c index 9ea11990..e61b0844 100644 --- a/freebsd/sys/kern/subr_sbuf.c +++ b/freebsd/sys/kern/subr_sbuf.c @@ -708,9 +708,10 @@ sbuf_finish(struct sbuf *s) #ifdef _KERNEL return (s->s_error); #else - errno = s->s_error; - if (s->s_error) + if (s->s_error != 0) { + errno = s->s_error; return (-1); + } return (0); #endif } diff --git a/freebsd/sys/kern/subr_taskqueue.c b/freebsd/sys/kern/subr_taskqueue.c index 867b0e6b..259b152d 100644 --- a/freebsd/sys/kern/subr_taskqueue.c +++ b/freebsd/sys/kern/subr_taskqueue.c @@ -291,6 +291,15 @@ taskqueue_enqueue_timeout(struct taskqueue *queue, return (res); } +static void +taskqueue_drain_running(struct taskqueue *queue) +{ + + while (!TAILQ_EMPTY(&queue->tq_active)) + TQ_SLEEP(queue, &queue->tq_active, &queue->tq_mutex, + PWAIT, "-", 0); +} + void taskqueue_block(struct taskqueue *queue) { @@ -343,6 +352,8 @@ taskqueue_run_locked(struct taskqueue *queue) wakeup(task); } TAILQ_REMOVE(&queue->tq_active, &tb, tb_link); + if (TAILQ_EMPTY(&queue->tq_active)) + wakeup(&queue->tq_active); } void @@ -383,11 +394,9 @@ taskqueue_cancel_locked(struct taskqueue *queue, struct task *task, int taskqueue_cancel(struct taskqueue *queue, struct task *task, u_int *pendp) { - u_int pending; int error; TQ_LOCK(queue); - pending = task->ta_pending; error = taskqueue_cancel_locked(queue, task, pendp); TQ_UNLOCK(queue); @@ -431,6 +440,27 @@ taskqueue_drain(struct taskqueue *queue, struct task *task) } void +taskqueue_drain_all(struct taskqueue *queue) +{ + struct task *task; + +#ifndef __rtems__ + if (!queue->tq_spin) + WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, __func__); +#endif /* __rtems__ */ + + TQ_LOCK(queue); + task = STAILQ_LAST(&queue->tq_queue, task, ta_link); + if (task != NULL) + while (task->ta_pending != 0) + TQ_SLEEP(queue, task, &queue->tq_mutex, PWAIT, "-", 0); + taskqueue_drain_running(queue); + KASSERT(STAILQ_EMPTY(&queue->tq_queue), + ("taskqueue queue is not empty after draining")); + TQ_UNLOCK(queue); +} + +void taskqueue_drain_timeout(struct taskqueue *queue, struct timeout_task *timeout_task) { @@ -614,7 +644,6 @@ taskqueue_member(struct taskqueue *queue, struct thread *td) { int i, j, ret = 0; - TQ_LOCK(queue); for (i = 0, j = 0; ; i++) { if (queue->tq_threads[i] == NULL) continue; @@ -625,6 +654,5 @@ taskqueue_member(struct taskqueue *queue, struct thread *td) if (++j >= queue->tq_tcount) break; } - TQ_UNLOCK(queue); return (ret); } diff --git a/freebsd/sys/kern/sys_generic.c b/freebsd/sys/kern/sys_generic.c index 6fc16fc5..eb1ed37d 100644 --- a/freebsd/sys/kern/sys_generic.c +++ b/freebsd/sys/kern/sys_generic.c @@ -83,6 +83,10 @@ __FBSDID("$FreeBSD$"); int iosize_max_clamp = 1; SYSCTL_INT(_debug, OID_AUTO, iosize_max_clamp, CTLFLAG_RW, &iosize_max_clamp, 0, "Clamp max i/o size to INT_MAX"); +int devfs_iosize_max_clamp = 1; +SYSCTL_INT(_debug, OID_AUTO, devfs_iosize_max_clamp, CTLFLAG_RW, + &devfs_iosize_max_clamp, 0, "Clamp max i/o size to INT_MAX for devices"); + /* * Assert that the return value of read(2) and write(2) syscalls fits * into a register. If not, an architecture will need to provide the diff --git a/freebsd/sys/kern/uipc_sockbuf.c b/freebsd/sys/kern/uipc_sockbuf.c index 2a0e527d..03b18b92 100644 --- a/freebsd/sys/kern/uipc_sockbuf.c +++ b/freebsd/sys/kern/uipc_sockbuf.c @@ -623,29 +623,12 @@ sbappendrecord(struct sockbuf *sb, struct mbuf *m0) SOCKBUF_UNLOCK(sb); } -/* - * Append address and data, and optionally, control (ancillary) data to the - * receive queue of a socket. If present, m0 must include a packet header - * with total length. Returns 0 if no space in sockbuf or insufficient - * mbufs. - */ -int -sbappendaddr_locked(struct sockbuf *sb, const struct sockaddr *asa, - struct mbuf *m0, struct mbuf *control) +/* Helper routine that appends data, control, and address to a sockbuf. */ +static int +sbappendaddr_locked_internal(struct sockbuf *sb, const struct sockaddr *asa, + struct mbuf *m0, struct mbuf *control, struct mbuf *ctrl_last) { struct mbuf *m, *n, *nlast; - int space = asa->sa_len; - - SOCKBUF_LOCK_ASSERT(sb); - - if (m0 && (m0->m_flags & M_PKTHDR) == 0) - panic("sbappendaddr_locked"); - if (m0) - space += m0->m_pkthdr.len; - space += m_length(control, &n); - - if (space > sbspace(sb)) - return (0); #if MSIZE <= 256 if (asa->sa_len > MLEN) return (0); @@ -655,8 +638,8 @@ sbappendaddr_locked(struct sockbuf *sb, const struct sockaddr *asa, return (0); m->m_len = asa->sa_len; bcopy(asa, mtod(m, caddr_t), asa->sa_len); - if (n) - n->m_next = m0; /* concatenate data to control */ + if (ctrl_last) + ctrl_last->m_next = m0; /* concatenate data to control */ else control = m0; m->m_next = control; @@ -680,6 +663,50 @@ sbappendaddr_locked(struct sockbuf *sb, const struct sockaddr *asa, * mbufs. */ int +sbappendaddr_locked(struct sockbuf *sb, const struct sockaddr *asa, + struct mbuf *m0, struct mbuf *control) +{ + struct mbuf *ctrl_last; + int space = asa->sa_len; + + SOCKBUF_LOCK_ASSERT(sb); + + if (m0 && (m0->m_flags & M_PKTHDR) == 0) + panic("sbappendaddr_locked"); + if (m0) + space += m0->m_pkthdr.len; + space += m_length(control, &ctrl_last); + + if (space > sbspace(sb)) + return (0); + return (sbappendaddr_locked_internal(sb, asa, m0, control, ctrl_last)); +} + +/* + * Append address and data, and optionally, control (ancillary) data to the + * receive queue of a socket. If present, m0 must include a packet header + * with total length. Returns 0 if insufficient mbufs. Does not validate space + * on the receiving sockbuf. + */ +int +sbappendaddr_nospacecheck_locked(struct sockbuf *sb, const struct sockaddr *asa, + struct mbuf *m0, struct mbuf *control) +{ + struct mbuf *ctrl_last; + + SOCKBUF_LOCK_ASSERT(sb); + + ctrl_last = (control == NULL) ? NULL : m_last(control); + return (sbappendaddr_locked_internal(sb, asa, m0, control, ctrl_last)); +} + +/* + * Append address and data, and optionally, control (ancillary) data to the + * receive queue of a socket. If present, m0 must include a packet header + * with total length. Returns 0 if no space in sockbuf or insufficient + * mbufs. + */ +int sbappendaddr(struct sockbuf *sb, const struct sockaddr *asa, struct mbuf *m0, struct mbuf *control) { @@ -1024,6 +1051,11 @@ sbcreatecontrol(caddr_t p, int size, int type, int level) m->m_len = 0; KASSERT(CMSG_SPACE((u_int)size) <= M_TRAILINGSPACE(m), ("sbcreatecontrol: short mbuf")); + /* + * Don't leave the padding between the msg header and the + * cmsg data and the padding after the cmsg data un-initialized. + */ + bzero(cp, CMSG_SPACE((u_int)size)); if (p != NULL) (void)memcpy(CMSG_DATA(cp), p, size); m->m_len = CMSG_SPACE(size); diff --git a/freebsd/sys/kern/uipc_socket.c b/freebsd/sys/kern/uipc_socket.c index 9ca2c14c..2dc76a9f 100644 --- a/freebsd/sys/kern/uipc_socket.c +++ b/freebsd/sys/kern/uipc_socket.c @@ -151,6 +151,10 @@ __FBSDID("$FreeBSD$"); #include <sys/sysent.h> #include <compat/freebsd32/freebsd32.h> #endif +#ifdef __rtems__ +#include <rtems/libio.h> +#define maxfiles rtems_libio_number_iops +#endif /* __rtems__ */ static int soreceive_rcvoob(struct socket *so, struct uio *uio, int flags); @@ -256,12 +260,14 @@ SYSCTL_NODE(_kern, KERN_IPC, ipc, CTLFLAG_RW, 0, "IPC"); uma_zone_t socket_zone; int maxsockets; +#ifndef __rtems__ static void socket_zone_change(void *tag) { uma_zone_set_max(socket_zone, maxsockets); } +#endif /* __rtems__ */ static void socket_init(void *tag) @@ -270,8 +276,10 @@ socket_init(void *tag) socket_zone = uma_zcreate("socket", sizeof(struct socket), NULL, NULL, NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE); uma_zone_set_max(socket_zone, maxsockets); +#ifndef __rtems__ EVENTHANDLER_REGISTER(maxsockets_change, socket_zone_change, NULL, EVENTHANDLER_PRI_FIRST); +#endif /* __rtems__ */ } SYSINIT(socket, SI_SUB_PROTO_DOMAININIT, SI_ORDER_ANY, socket_init, NULL); @@ -284,11 +292,7 @@ init_maxsockets(void *ignored) { TUNABLE_INT_FETCH("kern.ipc.maxsockets", &maxsockets); -#ifndef __rtems__ - maxsockets = imax(maxsockets, imax(maxfiles, nmbclusters)); -#else /* __rtems__ */ - maxsockets = imax(maxsockets, nmbclusters); -#endif /* __rtems__ */ + maxsockets = imax(maxsockets, maxfiles); } SYSINIT(param, SI_SUB_TUNABLES, SI_ORDER_ANY, init_maxsockets, NULL); @@ -304,15 +308,12 @@ sysctl_maxsockets(SYSCTL_HANDLER_ARGS) newmaxsockets = maxsockets; error = sysctl_handle_int(oidp, &newmaxsockets, 0, req); if (error == 0 && req->newptr) { - if (newmaxsockets > maxsockets) { + if (newmaxsockets > maxsockets && + newmaxsockets <= maxfiles) { maxsockets = newmaxsockets; #ifndef __rtems__ - if (maxsockets > ((maxfiles / 4) * 3)) { - maxfiles = (maxsockets * 5) / 4; - maxfilesperproc = (maxfiles * 9) / 10; - } -#endif /* __rtems__ */ EVENTHANDLER_INVOKE(maxsockets_change); +#endif /* __rtems__ */ } else error = EINVAL; } @@ -498,6 +499,10 @@ SYSCTL_INT(_regression, OID_AUTO, sonewconn_earlytest, CTLFLAG_RW, struct socket * sonewconn(struct socket *head, int connstatus) { + static struct timeval lastover; + static struct timeval overinterval = { 60, 0 }; + static int overcount; + struct socket *so; int over; @@ -509,9 +514,17 @@ sonewconn(struct socket *head, int connstatus) #else if (over) { #endif - log(LOG_DEBUG, "%s: pcb %p: Listen queue overflow: " - "%i already in queue awaiting acceptance\n", - __func__, head->so_pcb, head->so_qlen); + overcount++; + + if (ratecheck(&lastover, &overinterval)) { + log(LOG_DEBUG, "%s: pcb %p: Listen queue overflow: " + "%i already in queue awaiting acceptance " + "(%d occurrences)\n", + __func__, head->so_pcb, head->so_qlen, overcount); + + overcount = 0; + } + return (NULL); } VNET_ASSERT(head->so_vnet != NULL, ("%s:%d so_vnet is NULL, head=%p", @@ -2679,22 +2692,12 @@ sosetopt(struct socket *so, struct sockopt *sopt) sizeof tv); if (error) goto bad; - - /* assert(hz > 0); */ - if (tv.tv_sec < 0 || tv.tv_sec > INT_MAX / hz || - tv.tv_usec < 0 || tv.tv_usec >= 1000000) { + if (tv.tv_sec < 0 || tv.tv_usec < 0 || + tv.tv_usec >= 1000000) { error = EDOM; goto bad; } - /* assert(tick > 0); */ - /* assert(ULONG_MAX - INT_MAX >= 1000000); */ - val = (u_long)(tv.tv_sec * hz) + tv.tv_usec / tick; - if (val > INT_MAX) { - error = EDOM; - goto bad; - } - if (val == 0 && tv.tv_usec != 0) - val = 1; + val = tvtohz(&tv); switch (sopt->sopt_name) { case SO_SNDTIMEO: @@ -3039,8 +3042,10 @@ void sohasoutofband(struct socket *so) { +#ifndef __rtems__ if (so->so_sigio != NULL) pgsigio(&so->so_sigio, SIGURG, 0); +#endif /* __rtems__ */ selwakeuppri(&so->so_rcv.sb_sel, PSOCK); } diff --git a/freebsd/sys/kern/uipc_syscalls.c b/freebsd/sys/kern/uipc_syscalls.c index 738b5c3c..b0c83e60 100644 --- a/freebsd/sys/kern/uipc_syscalls.c +++ b/freebsd/sys/kern/uipc_syscalls.c @@ -111,6 +111,13 @@ static int kern_getsockopt( struct thread *td, int s, int level, int name, void *val, enum uio_seg valseg, socklen_t *valsize); #endif /* __rtems__ */ +/* + * Creation flags, OR'ed into socket() and socketpair() type argument. + * For stable/9, these are supported but not exposed in the header file. + */ +#define SOCK_CLOEXEC 0x10000000 +#define SOCK_NONBLOCK 0x20000000 + static int sendit(struct thread *td, int s, struct msghdr *mp, int flags); static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp); @@ -239,11 +246,26 @@ sys_socket(td, uap) #endif /* __rtems__ */ struct socket *so; struct file *fp; - int fd, error; + int fd, error, type, oflag, fflag; AUDIT_ARG_SOCKET(uap->domain, uap->type, uap->protocol); + + type = uap->type; + oflag = 0; + fflag = 0; +#ifndef __rtems__ + if ((type & SOCK_CLOEXEC) != 0) { + type &= ~SOCK_CLOEXEC; + oflag |= O_CLOEXEC; + } +#endif /* __rtems__ */ + if ((type & SOCK_NONBLOCK) != 0) { + type &= ~SOCK_NONBLOCK; + fflag |= FNONBLOCK; + } + #ifdef MAC - error = mac_socket_check_create(td->td_ucred, uap->domain, uap->type, + error = mac_socket_check_create(td->td_ucred, uap->domain, type, uap->protocol); if (error) return (error); @@ -251,16 +273,18 @@ sys_socket(td, uap) #ifndef __rtems__ fdp = td->td_proc->p_fd; #endif /* __rtems__ */ - error = falloc(td, &fp, &fd, 0); + error = falloc(td, &fp, &fd, oflag); if (error) return (error); /* An extra reference on `fp' has been held for us by falloc(). */ - error = socreate(uap->domain, &so, uap->type, uap->protocol, + error = socreate(uap->domain, &so, type, uap->protocol, td->td_ucred, td); if (error) { fdclose(fdp, fp, fd, td); } else { - finit(fp, FREAD | FWRITE, DTYPE_SOCKET, so, &socketops); + finit(fp, FREAD | FWRITE | fflag, DTYPE_SOCKET, so, &socketops); + if ((fflag & FNONBLOCK) != 0) + (void) fo_ioctl(fp, FIONBIO, &fflag, td->td_ucred, td); td->td_retval[0] = fd; } fdrop(fp, td); @@ -798,9 +822,20 @@ kern_socketpair(struct thread *td, int domain, int type, int protocol, struct filedesc *fdp = td->td_proc->p_fd; struct file *fp1, *fp2; struct socket *so1, *so2; - int fd, error; + int fd, error, oflag, fflag; AUDIT_ARG_SOCKET(domain, type, protocol); + + oflag = 0; + fflag = 0; + if ((type & SOCK_CLOEXEC) != 0) { + type &= ~SOCK_CLOEXEC; + oflag |= O_CLOEXEC; + } + if ((type & SOCK_NONBLOCK) != 0) { + type &= ~SOCK_NONBLOCK; + fflag |= FNONBLOCK; + } #ifdef MAC /* We might want to have a separate check for socket pairs. */ error = mac_socket_check_create(td->td_ucred, domain, type, @@ -815,12 +850,12 @@ kern_socketpair(struct thread *td, int domain, int type, int protocol, if (error) goto free1; /* On success extra reference to `fp1' and 'fp2' is set by falloc. */ - error = falloc(td, &fp1, &fd, 0); + error = falloc(td, &fp1, &fd, oflag); if (error) goto free2; rsv[0] = fd; fp1->f_data = so1; /* so1 already has ref count */ - error = falloc(td, &fp2, &fd, 0); + error = falloc(td, &fp2, &fd, oflag); if (error) goto free3; fp2->f_data = so2; /* so2 already has ref count */ @@ -836,8 +871,14 @@ kern_socketpair(struct thread *td, int domain, int type, int protocol, if (error) goto free4; } - finit(fp1, FREAD | FWRITE, DTYPE_SOCKET, fp1->f_data, &socketops); - finit(fp2, FREAD | FWRITE, DTYPE_SOCKET, fp2->f_data, &socketops); + finit(fp1, FREAD | FWRITE | fflag, DTYPE_SOCKET, fp1->f_data, + &socketops); + finit(fp2, FREAD | FWRITE | fflag, DTYPE_SOCKET, fp2->f_data, + &socketops); + if ((fflag & FNONBLOCK) != 0) { + (void) fo_ioctl(fp1, FIONBIO, &fflag, td->td_ucred, td); + (void) fo_ioctl(fp2, FIONBIO, &fflag, td->td_ucred, td); + } fdrop(fp1, td); fdrop(fp2, td); return (0); diff --git a/freebsd/sys/kern/uipc_usrreq.c b/freebsd/sys/kern/uipc_usrreq.c index 9254e9b2..b7cc060d 100644 --- a/freebsd/sys/kern/uipc_usrreq.c +++ b/freebsd/sys/kern/uipc_usrreq.c @@ -339,6 +339,7 @@ static struct protosw localsw[] = { */ .pr_flags = PR_ADDR|PR_ATOMIC|PR_CONNREQUIRED|PR_WANTRCVD| PR_RIGHTS, + .pr_ctloutput = &uipc_ctloutput, .pr_usrreqs = &uipc_usrreqs_seqpacket, }, }; @@ -985,7 +986,8 @@ uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, from = &sun_noname; so2 = unp2->unp_socket; SOCKBUF_LOCK(&so2->so_rcv); - if (sbappendaddr_locked(&so2->so_rcv, from, m, control)) { + if (sbappendaddr_nospacecheck_locked(&so2->so_rcv, from, m, + control)) { sorwakeup_locked(so2); m = NULL; control = NULL; @@ -1047,7 +1049,8 @@ uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, if (unp2->unp_flags & UNP_WANTCRED) { #ifndef __rtems__ /* - * Credentials are passed only once on SOCK_STREAM. + * Credentials are passed only once on SOCK_STREAM + * and SOCK_SEQPACKET. */ unp2->unp_flags &= ~UNP_WANTCRED; control = unp_addsockcred(td, control); @@ -1071,8 +1074,14 @@ uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam, const struct sockaddr *from; from = &sun_noname; - if (sbappendaddr_locked(&so2->so_rcv, from, m, - control)) + /* + * Don't check for space available in so2->so_rcv. + * Unix domain sockets only check for space in the + * sending sockbuf, and that check is performed one + * level up the stack. + */ + if (sbappendaddr_nospacecheck_locked(&so2->so_rcv, + from, m, control)) control = NULL; break; } @@ -1495,7 +1504,7 @@ unp_connect(struct socket *so, struct sockaddr *nam, struct thread *td) } /* - * The connecter's (client's) credentials are copied from its + * The connector's (client's) credentials are copied from its * process structure at the time of connect() (which is now). */ cru2x(td->td_ucred, &unp3->unp_peercred); |