diff options
Diffstat (limited to 'freebsd/sys/kern/kern_timeout.c')
-rw-r--r-- | freebsd/sys/kern/kern_timeout.c | 1336 |
1 files changed, 940 insertions, 396 deletions
diff --git a/freebsd/sys/kern/kern_timeout.c b/freebsd/sys/kern/kern_timeout.c index 00024aa3..37ec0956 100644 --- a/freebsd/sys/kern/kern_timeout.c +++ b/freebsd/sys/kern/kern_timeout.c @@ -39,13 +39,18 @@ #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); -#include <rtems/bsd/local/opt_kdtrace.h> +#include <rtems/bsd/local/opt_callout_profiling.h> +#include <rtems/bsd/local/opt_ddb.h> +#if defined(__arm__) || defined(__rtems__) +#include <rtems/bsd/local/opt_timer.h> +#endif +#include <rtems/bsd/local/opt_rss.h> #include <rtems/bsd/sys/param.h> #include <sys/systm.h> #include <sys/bus.h> #include <sys/callout.h> -#include <sys/condvar.h> +#include <sys/file.h> #include <sys/interrupt.h> #include <sys/kernel.h> #include <sys/ktr.h> @@ -58,19 +63,24 @@ __FBSDID("$FreeBSD$"); #include <sys/sysctl.h> #include <sys/smp.h> +#ifdef DDB +#include <ddb/ddb.h> +#include <machine/_inttypes.h> +#endif + #ifdef SMP #include <machine/cpu.h> #endif -#ifdef __rtems__ -#define ncallout 16 -#endif /* __rtems__ */ +#ifndef NO_EVENTTIMERS +DPCPU_DECLARE(sbintime_t, hardclocktime); +#endif + SDT_PROVIDER_DEFINE(callout_execute); -SDT_PROBE_DEFINE1(callout_execute, kernel, , callout__start, - "struct callout *"); -SDT_PROBE_DEFINE1(callout_execute, kernel, , callout__end, - "struct callout *"); +SDT_PROBE_DEFINE1(callout_execute, , , callout__start, "struct callout *"); +SDT_PROBE_DEFINE1(callout_execute, , , callout__end, "struct callout *"); +#ifdef CALLOUT_PROFILING static int avg_depth; SYSCTL_INT(_debug, OID_AUTO, to_avg_depth, CTLFLAG_RD, &avg_depth, 0, "Average number of items examined per softclock call. Units = 1/1000"); @@ -83,65 +93,106 @@ SYSCTL_INT(_debug, OID_AUTO, to_avg_lockcalls, CTLFLAG_RD, &avg_lockcalls, 0, static int avg_mpcalls; SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls, CTLFLAG_RD, &avg_mpcalls, 0, "Average number of MP callouts made per softclock call. Units = 1/1000"); +static int avg_depth_dir; +SYSCTL_INT(_debug, OID_AUTO, to_avg_depth_dir, CTLFLAG_RD, &avg_depth_dir, 0, + "Average number of direct callouts examined per callout_process call. " + "Units = 1/1000"); +static int avg_lockcalls_dir; +SYSCTL_INT(_debug, OID_AUTO, to_avg_lockcalls_dir, CTLFLAG_RD, + &avg_lockcalls_dir, 0, "Average number of lock direct callouts made per " + "callout_process call. Units = 1/1000"); +static int avg_mpcalls_dir; +SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls_dir, CTLFLAG_RD, &avg_mpcalls_dir, + 0, "Average number of MP direct callouts made per callout_process call. " + "Units = 1/1000"); +#endif + +#ifndef __rtems__ +static int ncallout; +SYSCTL_INT(_kern, OID_AUTO, ncallout, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &ncallout, 0, + "Number of entries in callwheel and size of timeout() preallocation"); +#else /* __rtems__ */ +#define ncallout 16 +#endif /* __rtems__ */ + +#ifdef RSS +static int pin_default_swi = 1; +static int pin_pcpu_swi = 1; +#else +static int pin_default_swi = 0; +static int pin_pcpu_swi = 0; +#endif + +SYSCTL_INT(_kern, OID_AUTO, pin_default_swi, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &pin_default_swi, + 0, "Pin the default (non-per-cpu) swi (shared with PCPU 0 swi)"); +SYSCTL_INT(_kern, OID_AUTO, pin_pcpu_swi, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &pin_pcpu_swi, + 0, "Pin the per-CPU swis (except PCPU 0, which is also default"); + /* * TODO: * allocate more timeout table slots when table overflows. */ -int callwheelsize, callwheelbits, callwheelmask; +u_int callwheelsize, callwheelmask; /* - * The callout cpu migration entity represents informations necessary for - * describing the migrating callout to the new callout cpu. + * The callout cpu exec entities represent informations necessary for + * describing the state of callouts currently running on the CPU and the ones + * necessary for migrating callouts to the new callout cpu. In particular, + * the first entry of the array cc_exec_entity holds informations for callout + * running in SWI thread context, while the second one holds informations + * for callout running directly from hardware interrupt context. * The cached informations are very important for deferring migration when * the migrating callout is already running. */ -struct cc_mig_ent { +struct cc_exec { + struct callout *cc_curr; + void (*cc_drain)(void *); #ifdef SMP - void (*ce_migration_func)(void *); - void *ce_migration_arg; - int ce_migration_cpu; - int ce_migration_ticks; + void (*ce_migration_func)(void *); + void *ce_migration_arg; + int ce_migration_cpu; + sbintime_t ce_migration_time; + sbintime_t ce_migration_prec; #endif + bool cc_cancel; + bool cc_waiting; }; - + /* * There is one struct callout_cpu per cpu, holding all relevant * state for the callout processing thread on the individual CPU. - * In particular: - * cc_ticks is incremented once per tick in callout_cpu(). - * It tracks the global 'ticks' but in a way that the individual - * threads should not worry about races in the order in which - * hardclock() and hardclock_cpu() run on the various CPUs. - * cc_softclock is advanced in callout_cpu() to point to the - * first entry in cc_callwheel that may need handling. In turn, - * a softclock() is scheduled so it can serve the various entries i - * such that cc_softclock <= i <= cc_ticks . - * XXX maybe cc_softclock and cc_ticks should be volatile ? - * - * cc_ticks is also used in callout_reset_cpu() to determine - * when the callout should be served. */ struct callout_cpu { - struct cc_mig_ent cc_migrating_entity; - struct mtx cc_lock; - struct callout *cc_callout; - struct callout_tailq *cc_callwheel; - struct callout_list cc_callfree; + struct mtx_padalign cc_lock; + struct cc_exec cc_exec_entity[2]; struct callout *cc_next; - struct callout *cc_curr; + struct callout *cc_callout; + struct callout_list *cc_callwheel; +#ifndef __rtems__ + struct callout_tailq cc_expireq; +#endif /* __rtems__ */ + struct callout_slist cc_callfree; + sbintime_t cc_firstevent; + sbintime_t cc_lastscan; void *cc_cookie; - int cc_ticks; - int cc_softticks; - int cc_cancel; - int cc_waiting; - int cc_firsttick; + u_int cc_bucket; + u_int cc_inited; + char cc_ktr_event_name[20]; }; +#define callout_migrating(c) ((c)->c_iflags & CALLOUT_DFRMIGRATION) + +#define cc_exec_curr(cc, dir) cc->cc_exec_entity[dir].cc_curr +#define cc_exec_drain(cc, dir) cc->cc_exec_entity[dir].cc_drain +#define cc_exec_next(cc) cc->cc_next +#define cc_exec_cancel(cc, dir) cc->cc_exec_entity[dir].cc_cancel +#define cc_exec_waiting(cc, dir) cc->cc_exec_entity[dir].cc_waiting #ifdef SMP -#define cc_migration_func cc_migrating_entity.ce_migration_func -#define cc_migration_arg cc_migrating_entity.ce_migration_arg -#define cc_migration_cpu cc_migrating_entity.ce_migration_cpu -#define cc_migration_ticks cc_migrating_entity.ce_migration_ticks +#define cc_migration_func(cc, dir) cc->cc_exec_entity[dir].ce_migration_func +#define cc_migration_arg(cc, dir) cc->cc_exec_entity[dir].ce_migration_arg +#define cc_migration_cpu(cc, dir) cc->cc_exec_entity[dir].ce_migration_cpu +#define cc_migration_time(cc, dir) cc->cc_exec_entity[dir].ce_migration_time +#define cc_migration_prec(cc, dir) cc->cc_exec_entity[dir].ce_migration_prec struct callout_cpu cc_cpu[MAXCPU]; #define CPUBLOCK MAXCPU @@ -157,39 +208,49 @@ struct callout_cpu cc_cpu; #define CC_LOCK_ASSERT(cc) mtx_assert(&(cc)->cc_lock, MA_OWNED) static int timeout_cpu; -void (*callout_new_inserted)(int cpu, int ticks) = NULL; + +static void callout_cpu_init(struct callout_cpu *cc, int cpu); +static void softclock_call_cc(struct callout *c, struct callout_cpu *cc, +#ifdef CALLOUT_PROFILING + int *mpcalls, int *lockcalls, int *gcalls, +#endif + int direct); static MALLOC_DEFINE(M_CALLOUT, "callout", "Callout datastructures"); /** * Locked by cc_lock: - * cc_curr - If a callout is in progress, it is curr_callout. - * If curr_callout is non-NULL, threads waiting in + * cc_curr - If a callout is in progress, it is cc_curr. + * If cc_curr is non-NULL, threads waiting in * callout_drain() will be woken up as soon as the * relevant callout completes. - * cc_cancel - Changing to 1 with both callout_lock and c_lock held + * cc_cancel - Changing to 1 with both callout_lock and cc_lock held * guarantees that the current callout will not run. * The softclock() function sets this to 0 before it * drops callout_lock to acquire c_lock, and it calls * the handler only if curr_cancelled is still 0 after - * c_lock is successfully acquired. + * cc_lock is successfully acquired. * cc_waiting - If a thread is waiting in callout_drain(), then * callout_wait is nonzero. Set only when - * curr_callout is non-NULL. + * cc_curr is non-NULL. */ /* - * Resets the migration entity tied to a specific callout cpu. + * Resets the execution entity tied to a specific callout cpu. */ static void -cc_cme_cleanup(struct callout_cpu *cc) +cc_cce_cleanup(struct callout_cpu *cc, int direct) { + cc_exec_curr(cc, direct) = NULL; + cc_exec_cancel(cc, direct) = false; + cc_exec_waiting(cc, direct) = false; #ifdef SMP - cc->cc_migration_cpu = CPUBLOCK; - cc->cc_migration_ticks = 0; - cc->cc_migration_func = NULL; - cc->cc_migration_arg = NULL; + cc_migration_cpu(cc, direct) = CPUBLOCK; + cc_migration_time(cc, direct) = 0; + cc_migration_prec(cc, direct) = 0; + cc_migration_func(cc, direct) = NULL; + cc_migration_arg(cc, direct) = NULL; #endif } @@ -197,27 +258,23 @@ cc_cme_cleanup(struct callout_cpu *cc) * Checks if migration is requested by a specific callout cpu. */ static int -cc_cme_migrating(struct callout_cpu *cc) +cc_cce_migrating(struct callout_cpu *cc, int direct) { #ifdef SMP - return (cc->cc_migration_cpu != CPUBLOCK); + return (cc_migration_cpu(cc, direct) != CPUBLOCK); #else return (0); #endif } /* - * kern_timeout_callwheel_alloc() - kernel low level callwheel initialization - * - * This code is called very early in the kernel initialization sequence, - * and may be called more then once. + * Kernel low level callwheel initialization + * called on cpu0 during kernel startup. */ #ifdef __rtems__ static void rtems_bsd_timeout_init_early(void *); -static void callout_cpu_init(struct callout_cpu *); - static void rtems_bsd_callout_timer(rtems_id id, void *arg) { @@ -228,7 +285,7 @@ rtems_bsd_callout_timer(rtems_id id, void *arg) sc = rtems_timer_reset(id); BSD_ASSERT(sc == RTEMS_SUCCESSFUL); - callout_tick(); + callout_process(sbinuptime()); } static void @@ -253,63 +310,90 @@ SYSINIT(rtems_bsd_timeout_late, SI_SUB_LAST, SI_ORDER_FIRST, rtems_bsd_timeout_init_late, NULL); static void -rtems_bsd_timeout_init_early(void *unused) +rtems_bsd_timeout_init_early(void *dummy) #else /* __rtems__ */ -caddr_t -kern_timeout_callwheel_alloc(caddr_t v) +static void +callout_callwheel_init(void *dummy) #endif /* __rtems__ */ { struct callout_cpu *cc; #ifdef __rtems__ - caddr_t v; + (void) dummy; +#endif /* __rtems__ */ - (void) unused; + /* + * Calculate the size of the callout wheel and the preallocated + * timeout() structures. + * XXX: Clip callout to result of previous function of maxusers + * maximum 384. This is still huge, but acceptable. + */ + memset(CC_CPU(0), 0, sizeof(cc_cpu)); +#ifndef __rtems__ + ncallout = imin(16 + maxproc + maxfiles, 18508); + TUNABLE_INT_FETCH("kern.ncallout", &ncallout); #endif /* __rtems__ */ - timeout_cpu = PCPU_GET(cpuid); - cc = CC_CPU(timeout_cpu); /* - * Calculate callout wheel size + * Calculate callout wheel size, should be next power of two higher + * than 'ncallout'. */ - for (callwheelsize = 1, callwheelbits = 0; - callwheelsize < ncallout; - callwheelsize <<= 1, ++callwheelbits) - ; + callwheelsize = 1 << fls(ncallout); callwheelmask = callwheelsize - 1; -#ifdef __rtems__ - v = malloc(ncallout * sizeof(*cc->cc_callout) + callwheelsize - * sizeof(*cc->cc_callwheel), M_CALLOUT, M_ZERO | M_WAITOK); -#endif /* __rtems__ */ - cc->cc_callout = (struct callout *)v; - v = (caddr_t)(cc->cc_callout + ncallout); - cc->cc_callwheel = (struct callout_tailq *)v; - v = (caddr_t)(cc->cc_callwheel + callwheelsize); #ifndef __rtems__ - return(v); -#else /* __rtems__ */ - callout_cpu_init(cc); + /* + * Fetch whether we're pinning the swi's or not. + */ + TUNABLE_INT_FETCH("kern.pin_default_swi", &pin_default_swi); + TUNABLE_INT_FETCH("kern.pin_pcpu_swi", &pin_pcpu_swi); #endif /* __rtems__ */ + + /* + * Only cpu0 handles timeout(9) and receives a preallocation. + * + * XXX: Once all timeout(9) consumers are converted this can + * be removed. + */ + timeout_cpu = PCPU_GET(cpuid); + cc = CC_CPU(timeout_cpu); + cc->cc_callout = malloc(ncallout * sizeof(struct callout), + M_CALLOUT, M_WAITOK); + callout_cpu_init(cc, timeout_cpu); } +#ifndef __rtems__ +SYSINIT(callwheel_init, SI_SUB_CPU, SI_ORDER_ANY, callout_callwheel_init, NULL); +#endif /* __rtems__ */ +/* + * Initialize the per-cpu callout structures. + */ static void -callout_cpu_init(struct callout_cpu *cc) +callout_cpu_init(struct callout_cpu *cc, int cpu) { struct callout *c; int i; mtx_init(&cc->cc_lock, "callout", NULL, MTX_SPIN | MTX_RECURSE); SLIST_INIT(&cc->cc_callfree); - for (i = 0; i < callwheelsize; i++) { - TAILQ_INIT(&cc->cc_callwheel[i]); - } - cc_cme_cleanup(cc); - if (cc->cc_callout == NULL) + cc->cc_inited = 1; + cc->cc_callwheel = malloc(sizeof(struct callout_list) * callwheelsize, + M_CALLOUT, M_WAITOK); + for (i = 0; i < callwheelsize; i++) + LIST_INIT(&cc->cc_callwheel[i]); +#ifndef __rtems__ + TAILQ_INIT(&cc->cc_expireq); +#endif /* __rtems__ */ + cc->cc_firstevent = SBT_MAX; + for (i = 0; i < 2; i++) + cc_cce_cleanup(cc, i); + snprintf(cc->cc_ktr_event_name, sizeof(cc->cc_ktr_event_name), + "callwheel cpu %d", cpu); + if (cc->cc_callout == NULL) /* Only cpu0 handles timeout(9) */ return; for (i = 0; i < ncallout; i++) { c = &cc->cc_callout[i]; callout_init(c, 0); - c->c_flags = CALLOUT_LOCAL_ALLOC; + c->c_iflags = CALLOUT_LOCAL_ALLOC; SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle); } } @@ -346,109 +430,201 @@ callout_cpu_switch(struct callout *c, struct callout_cpu *cc, int new_cpu) #ifndef __rtems__ /* - * kern_timeout_callwheel_init() - initialize previously reserved callwheel - * space. - * - * This code is called just once, after the space reserved for the - * callout wheel has been finalized. - */ -void -kern_timeout_callwheel_init(void) -{ - callout_cpu_init(CC_CPU(timeout_cpu)); -} -#endif /* __rtems__ */ - -/* * Start standard softclock thread. */ static void start_softclock(void *dummy) { struct callout_cpu *cc; + char name[MAXCOMLEN]; #ifdef SMP int cpu; + struct intr_event *ie; #endif cc = CC_CPU(timeout_cpu); - if (swi_add(&clk_intr_event, "clock", softclock, cc, SWI_CLOCK, + snprintf(name, sizeof(name), "clock (%d)", timeout_cpu); + if (swi_add(&clk_intr_event, name, softclock, cc, SWI_CLOCK, INTR_MPSAFE, &cc->cc_cookie)) panic("died while creating standard software ithreads"); + if (pin_default_swi && + (intr_event_bind(clk_intr_event, timeout_cpu) != 0)) { + printf("%s: timeout clock couldn't be pinned to cpu %d\n", + __func__, + timeout_cpu); + } + #ifdef SMP CPU_FOREACH(cpu) { if (cpu == timeout_cpu) continue; cc = CC_CPU(cpu); - if (swi_add(NULL, "clock", softclock, cc, SWI_CLOCK, + cc->cc_callout = NULL; /* Only cpu0 handles timeout(9). */ + callout_cpu_init(cc, cpu); + snprintf(name, sizeof(name), "clock (%d)", cpu); + ie = NULL; + if (swi_add(&ie, name, softclock, cc, SWI_CLOCK, INTR_MPSAFE, &cc->cc_cookie)) panic("died while creating standard software ithreads"); - cc->cc_callout = NULL; /* Only cpu0 handles timeout(). */ - cc->cc_callwheel = malloc( - sizeof(struct callout_tailq) * callwheelsize, M_CALLOUT, - M_WAITOK); - callout_cpu_init(cc); + if (pin_pcpu_swi && (intr_event_bind(ie, cpu) != 0)) { + printf("%s: per-cpu clock couldn't be pinned to " + "cpu %d\n", + __func__, + cpu); + } } #endif } - SYSINIT(start_softclock, SI_SUB_SOFTINTR, SI_ORDER_FIRST, start_softclock, NULL); +#endif /* __rtems__ */ + +#define CC_HASH_SHIFT 8 + +static inline u_int +callout_hash(sbintime_t sbt) +{ + + return (sbt >> (32 - CC_HASH_SHIFT)); +} + +static inline u_int +callout_get_bucket(sbintime_t sbt) +{ + + return (callout_hash(sbt) & callwheelmask); +} void -callout_tick(void) +callout_process(sbintime_t now) { + struct callout *tmp, *tmpn; struct callout_cpu *cc; - int need_softclock; - int bucket; + struct callout_list *sc; + sbintime_t first, last, max, tmp_max; + uint32_t lookahead; + u_int firstb, lastb, nowb; +#ifdef CALLOUT_PROFILING + int depth_dir = 0, mpcalls_dir = 0, lockcalls_dir = 0; +#endif - /* - * Process callouts at a very low cpu priority, so we don't keep the - * relatively high clock interrupt priority any longer than necessary. - */ - need_softclock = 0; cc = CC_SELF(); mtx_lock_spin_flags(&cc->cc_lock, MTX_QUIET); - cc->cc_firsttick = cc->cc_ticks = ticks; - for (; (cc->cc_softticks - cc->cc_ticks) <= 0; cc->cc_softticks++) { - bucket = cc->cc_softticks & callwheelmask; - if (!TAILQ_EMPTY(&cc->cc_callwheel[bucket])) { - need_softclock = 1; - break; - } + + /* Compute the buckets of the last scan and present times. */ + firstb = callout_hash(cc->cc_lastscan); + cc->cc_lastscan = now; + nowb = callout_hash(now); + + /* Compute the last bucket and minimum time of the bucket after it. */ + if (nowb == firstb) + lookahead = (SBT_1S / 16); + else if (nowb - firstb == 1) + lookahead = (SBT_1S / 8); + else + lookahead = (SBT_1S / 2); + first = last = now; + first += (lookahead / 2); + last += lookahead; + last &= (0xffffffffffffffffLLU << (32 - CC_HASH_SHIFT)); + lastb = callout_hash(last) - 1; + max = last; + + /* + * Check if we wrapped around the entire wheel from the last scan. + * In case, we need to scan entirely the wheel for pending callouts. + */ + if (lastb - firstb >= callwheelsize) { + lastb = firstb + callwheelsize - 1; + if (nowb - firstb >= callwheelsize) + nowb = lastb; } + + /* Iterate callwheel from firstb to nowb and then up to lastb. */ + do { + sc = &cc->cc_callwheel[firstb & callwheelmask]; + tmp = LIST_FIRST(sc); + while (tmp != NULL) { + /* Run the callout if present time within allowed. */ + if (tmp->c_time <= now) { +#ifndef __rtems__ + /* + * Consumer told us the callout may be run + * directly from hardware interrupt context. + */ + if (tmp->c_iflags & CALLOUT_DIRECT) { +#endif /* __rtems__ */ +#ifdef CALLOUT_PROFILING + ++depth_dir; +#endif + cc_exec_next(cc) = + LIST_NEXT(tmp, c_links.le); + cc->cc_bucket = firstb & callwheelmask; + LIST_REMOVE(tmp, c_links.le); + softclock_call_cc(tmp, cc, +#ifdef CALLOUT_PROFILING + &mpcalls_dir, &lockcalls_dir, NULL, +#endif + 1); + tmp = cc_exec_next(cc); + cc_exec_next(cc) = NULL; +#ifndef __rtems__ + } else { + tmpn = LIST_NEXT(tmp, c_links.le); + LIST_REMOVE(tmp, c_links.le); + TAILQ_INSERT_TAIL(&cc->cc_expireq, + tmp, c_links.tqe); + tmp->c_iflags |= CALLOUT_PROCESSED; + tmp = tmpn; + } +#endif /* __rtems__ */ + continue; + } + /* Skip events from distant future. */ + if (tmp->c_time >= max) + goto next; + /* + * Event minimal time is bigger than present maximal + * time, so it cannot be aggregated. + */ + if (tmp->c_time > last) { + lastb = nowb; + goto next; + } + /* Update first and last time, respecting this event. */ + if (tmp->c_time < first) + first = tmp->c_time; + tmp_max = tmp->c_time + tmp->c_precision; + if (tmp_max < last) + last = tmp_max; +next: + tmp = LIST_NEXT(tmp, c_links.le); + } + /* Proceed with the next bucket. */ + firstb++; + /* + * Stop if we looked after present time and found + * some event we can't execute at now. + * Stop if we looked far enough into the future. + */ + } while (((int)(firstb - lastb)) <= 0); + cc->cc_firstevent = last; +#ifndef NO_EVENTTIMERS + cpu_new_callout(curcpu, last, first); +#endif +#ifdef CALLOUT_PROFILING + avg_depth_dir += (depth_dir * 1000 - avg_depth_dir) >> 8; + avg_mpcalls_dir += (mpcalls_dir * 1000 - avg_mpcalls_dir) >> 8; + avg_lockcalls_dir += (lockcalls_dir * 1000 - avg_lockcalls_dir) >> 8; +#endif mtx_unlock_spin_flags(&cc->cc_lock, MTX_QUIET); +#ifndef __rtems__ /* * swi_sched acquires the thread lock, so we don't want to call it * with cc_lock held; incorrect locking order. */ - if (need_softclock) + if (!TAILQ_EMPTY(&cc->cc_expireq)) swi_sched(cc->cc_cookie, 0); -} - -int -callout_tickstofirst(int limit) -{ - struct callout_cpu *cc; - struct callout *c; - struct callout_tailq *sc; - int curticks; - int skip = 1; - - cc = CC_SELF(); - mtx_lock_spin_flags(&cc->cc_lock, MTX_QUIET); - curticks = cc->cc_ticks; - while( skip < ncallout && skip < limit ) { - sc = &cc->cc_callwheel[ (curticks+skip) & callwheelmask ]; - /* search scanning ticks */ - TAILQ_FOREACH( c, sc, c_links.tqe ){ - if (c->c_time - curticks <= ncallout) - goto out; - } - skip++; - } -out: - cc->cc_firsttick = curticks + skip; - mtx_unlock_spin_flags(&cc->cc_lock, MTX_QUIET); - return (skip); +#endif /* __rtems__ */ } static struct callout_cpu * @@ -476,169 +652,224 @@ callout_lock(struct callout *c) } static void -callout_cc_add(struct callout *c, struct callout_cpu *cc, int to_ticks, - void (*func)(void *), void *arg, int cpu) +callout_cc_add(struct callout *c, struct callout_cpu *cc, + sbintime_t sbt, sbintime_t precision, void (*func)(void *), + void *arg, int cpu, int flags) { + int bucket; CC_LOCK_ASSERT(cc); - - if (to_ticks <= 0) - to_ticks = 1; + if (sbt < cc->cc_lastscan) + sbt = cc->cc_lastscan; c->c_arg = arg; - c->c_flags |= (CALLOUT_ACTIVE | CALLOUT_PENDING); + c->c_iflags |= CALLOUT_PENDING; + c->c_iflags &= ~CALLOUT_PROCESSED; + c->c_flags |= CALLOUT_ACTIVE; + if (flags & C_DIRECT_EXEC) + c->c_iflags |= CALLOUT_DIRECT; c->c_func = func; - c->c_time = ticks + to_ticks; - TAILQ_INSERT_TAIL(&cc->cc_callwheel[c->c_time & callwheelmask], - c, c_links.tqe); - if ((c->c_time - cc->cc_firsttick) < 0 && - callout_new_inserted != NULL) { - cc->cc_firsttick = c->c_time; - (*callout_new_inserted)(cpu, - to_ticks + (ticks - cc->cc_ticks)); + c->c_time = sbt; + c->c_precision = precision; + bucket = callout_get_bucket(c->c_time); + CTR3(KTR_CALLOUT, "precision set for %p: %d.%08x", + c, (int)(c->c_precision >> 32), + (u_int)(c->c_precision & 0xffffffff)); + LIST_INSERT_HEAD(&cc->cc_callwheel[bucket], c, c_links.le); + if (cc->cc_bucket == bucket) + cc_exec_next(cc) = c; +#ifndef NO_EVENTTIMERS + /* + * Inform the eventtimers(4) subsystem there's a new callout + * that has been inserted, but only if really required. + */ + if (SBT_MAX - c->c_time < c->c_precision) + c->c_precision = SBT_MAX - c->c_time; + sbt = c->c_time + c->c_precision; + if (sbt < cc->cc_firstevent) { + cc->cc_firstevent = sbt; + cpu_new_callout(cpu, sbt, c->c_time); } +#endif } static void callout_cc_del(struct callout *c, struct callout_cpu *cc) { - if ((c->c_flags & CALLOUT_LOCAL_ALLOC) == 0) + if ((c->c_iflags & CALLOUT_LOCAL_ALLOC) == 0) return; c->c_func = NULL; SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle); } static void -softclock_call_cc(struct callout *c, struct callout_cpu *cc, int *mpcalls, - int *lockcalls, int *gcalls) +softclock_call_cc(struct callout *c, struct callout_cpu *cc, +#ifdef CALLOUT_PROFILING + int *mpcalls, int *lockcalls, int *gcalls, +#endif + int direct) { +#ifndef __rtems__ + struct rm_priotracker tracker; +#endif /* __rtems__ */ void (*c_func)(void *); void *c_arg; struct lock_class *class; struct lock_object *c_lock; - int c_flags, sharedlock; + uintptr_t lock_status; + int c_iflags; #ifdef SMP struct callout_cpu *new_cc; void (*new_func)(void *); void *new_arg; - int new_cpu, new_ticks; + int flags, new_cpu; + sbintime_t new_prec, new_time; #endif -#ifdef DIAGNOSTIC - struct bintime bt1, bt2; +#if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING) + sbintime_t sbt1, sbt2; struct timespec ts2; - static uint64_t maxdt = 36893488147419102LL; /* 2 msec */ + static sbintime_t maxdt = 2 * SBT_1MS; /* 2 msec */ static timeout_t *lastfunc; #endif - KASSERT((c->c_flags & (CALLOUT_PENDING | CALLOUT_ACTIVE)) == - (CALLOUT_PENDING | CALLOUT_ACTIVE), - ("softclock_call_cc: pend|act %p %x", c, c->c_flags)); + KASSERT((c->c_iflags & CALLOUT_PENDING) == CALLOUT_PENDING, + ("softclock_call_cc: pend %p %x", c, c->c_iflags)); + KASSERT((c->c_flags & CALLOUT_ACTIVE) == CALLOUT_ACTIVE, + ("softclock_call_cc: act %p %x", c, c->c_flags)); class = (c->c_lock != NULL) ? LOCK_CLASS(c->c_lock) : NULL; - sharedlock = (c->c_flags & CALLOUT_SHAREDLOCK) ? 0 : 1; + lock_status = 0; + if (c->c_flags & CALLOUT_SHAREDLOCK) { +#ifndef __rtems__ + if (class == &lock_class_rm) + lock_status = (uintptr_t)&tracker; + else +#endif /* __rtems__ */ + lock_status = 1; + } c_lock = c->c_lock; c_func = c->c_func; c_arg = c->c_arg; - c_flags = c->c_flags; - if (c->c_flags & CALLOUT_LOCAL_ALLOC) - c->c_flags = CALLOUT_LOCAL_ALLOC; + c_iflags = c->c_iflags; + if (c->c_iflags & CALLOUT_LOCAL_ALLOC) + c->c_iflags = CALLOUT_LOCAL_ALLOC; else - c->c_flags &= ~CALLOUT_PENDING; - cc->cc_curr = c; - cc->cc_cancel = 0; + c->c_iflags &= ~CALLOUT_PENDING; + + cc_exec_curr(cc, direct) = c; + cc_exec_cancel(cc, direct) = false; + cc_exec_drain(cc, direct) = NULL; CC_UNLOCK(cc); if (c_lock != NULL) { - class->lc_lock(c_lock, sharedlock); + class->lc_lock(c_lock, lock_status); /* * The callout may have been cancelled * while we switched locks. */ - if (cc->cc_cancel) { + if (cc_exec_cancel(cc, direct)) { class->lc_unlock(c_lock); goto skip; } /* The callout cannot be stopped now. */ - cc->cc_cancel = 1; - + cc_exec_cancel(cc, direct) = true; if (c_lock == &Giant.lock_object) { +#ifdef CALLOUT_PROFILING (*gcalls)++; - CTR3(KTR_CALLOUT, "callout %p func %p arg %p", +#endif + CTR3(KTR_CALLOUT, "callout giant %p func %p arg %p", c, c_func, c_arg); } else { +#ifdef CALLOUT_PROFILING (*lockcalls)++; +#endif CTR3(KTR_CALLOUT, "callout lock %p func %p arg %p", c, c_func, c_arg); } } else { +#ifdef CALLOUT_PROFILING (*mpcalls)++; - CTR3(KTR_CALLOUT, "callout mpsafe %p func %p arg %p", +#endif + CTR3(KTR_CALLOUT, "callout %p func %p arg %p", c, c_func, c_arg); } -#ifdef DIAGNOSTIC - binuptime(&bt1); + KTR_STATE3(KTR_SCHED, "callout", cc->cc_ktr_event_name, "running", + "func:%p", c_func, "arg:%p", c_arg, "direct:%d", direct); +#if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING) + sbt1 = sbinuptime(); #endif #ifndef __rtems__ THREAD_NO_SLEEPING(); - SDT_PROBE(callout_execute, kernel, , callout__start, c, 0, 0, 0, 0); + SDT_PROBE1(callout_execute, , , callout__start, c); #endif /* __rtems__ */ c_func(c_arg); #ifndef __rtems__ - SDT_PROBE(callout_execute, kernel, , callout__end, c, 0, 0, 0, 0); + SDT_PROBE1(callout_execute, , , callout__end, c); THREAD_SLEEPING_OK(); #endif /* __rtems__ */ -#ifdef DIAGNOSTIC - binuptime(&bt2); - bintime_sub(&bt2, &bt1); - if (bt2.frac > maxdt) { - if (lastfunc != c_func || bt2.frac > maxdt * 2) { - bintime2timespec(&bt2, &ts2); +#if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING) + sbt2 = sbinuptime(); + sbt2 -= sbt1; + if (sbt2 > maxdt) { + if (lastfunc != c_func || sbt2 > maxdt * 2) { + ts2 = sbttots(sbt2); printf( "Expensive timeout(9) function: %p(%p) %jd.%09ld s\n", c_func, c_arg, (intmax_t)ts2.tv_sec, ts2.tv_nsec); } - maxdt = bt2.frac; + maxdt = sbt2; lastfunc = c_func; } #endif + KTR_STATE0(KTR_SCHED, "callout", cc->cc_ktr_event_name, "idle"); CTR1(KTR_CALLOUT, "callout %p finished", c); - if ((c_flags & CALLOUT_RETURNUNLOCKED) == 0) + if ((c_iflags & CALLOUT_RETURNUNLOCKED) == 0) class->lc_unlock(c_lock); skip: CC_LOCK(cc); - KASSERT(cc->cc_curr == c, ("mishandled cc_curr")); - cc->cc_curr = NULL; - if (cc->cc_waiting) { + KASSERT(cc_exec_curr(cc, direct) == c, ("mishandled cc_curr")); + cc_exec_curr(cc, direct) = NULL; + if (cc_exec_drain(cc, direct)) { + void (*drain)(void *); + + drain = cc_exec_drain(cc, direct); + cc_exec_drain(cc, direct) = NULL; + CC_UNLOCK(cc); + drain(c_arg); + CC_LOCK(cc); + } + if (cc_exec_waiting(cc, direct)) { /* * There is someone waiting for the * callout to complete. * If the callout was scheduled for * migration just cancel it. */ - if (cc_cme_migrating(cc)) { - cc_cme_cleanup(cc); + if (cc_cce_migrating(cc, direct)) { + cc_cce_cleanup(cc, direct); /* * It should be assert here that the callout is not * destroyed but that is not easy. */ - c->c_flags &= ~CALLOUT_DFRMIGRATION; + c->c_iflags &= ~CALLOUT_DFRMIGRATION; } - cc->cc_waiting = 0; + cc_exec_waiting(cc, direct) = false; CC_UNLOCK(cc); - wakeup(&cc->cc_waiting); + wakeup(&cc_exec_waiting(cc, direct)); CC_LOCK(cc); - } else if (cc_cme_migrating(cc)) { - KASSERT((c_flags & CALLOUT_LOCAL_ALLOC) == 0, + } else if (cc_cce_migrating(cc, direct)) { + KASSERT((c_iflags & CALLOUT_LOCAL_ALLOC) == 0, ("Migrating legacy callout %p", c)); #ifdef SMP /* * If the callout was scheduled for * migration just perform it now. */ - new_cpu = cc->cc_migration_cpu; - new_ticks = cc->cc_migration_ticks; - new_func = cc->cc_migration_func; - new_arg = cc->cc_migration_arg; - cc_cme_cleanup(cc); + new_cpu = cc_migration_cpu(cc, direct); + new_time = cc_migration_time(cc, direct); + new_prec = cc_migration_prec(cc, direct); + new_func = cc_migration_func(cc, direct); + new_arg = cc_migration_arg(cc, direct); + cc_cce_cleanup(cc, direct); /* * It should be assert here that the callout is not destroyed @@ -646,18 +877,19 @@ skip: * * As first thing, handle deferred callout stops. */ - if ((c->c_flags & CALLOUT_DFRMIGRATION) == 0) { + if (!callout_migrating(c)) { CTR3(KTR_CALLOUT, "deferred cancelled %p func %p arg %p", c, new_func, new_arg); callout_cc_del(c, cc); return; } - c->c_flags &= ~CALLOUT_DFRMIGRATION; + c->c_iflags &= ~CALLOUT_DFRMIGRATION; new_cc = callout_cpu_switch(c, cc, new_cpu); - callout_cc_add(c, new_cc, new_ticks, new_func, new_arg, - new_cpu); + flags = (direct) ? C_DIRECT_EXEC : 0; + callout_cc_add(c, new_cc, new_time, new_prec, new_func, + new_arg, new_cpu, flags); CC_UNLOCK(new_cc); CC_LOCK(cc); #else @@ -668,19 +900,19 @@ skip: * If the current callout is locally allocated (from * timeout(9)) then put it on the freelist. * - * Note: we need to check the cached copy of c_flags because + * Note: we need to check the cached copy of c_iflags because * if it was not local, then it's not safe to deref the * callout pointer. */ - KASSERT((c_flags & CALLOUT_LOCAL_ALLOC) == 0 || - c->c_flags == CALLOUT_LOCAL_ALLOC, + KASSERT((c_iflags & CALLOUT_LOCAL_ALLOC) == 0 || + c->c_iflags == CALLOUT_LOCAL_ALLOC, ("corrupted callout")); - if (c_flags & CALLOUT_LOCAL_ALLOC) + if (c_iflags & CALLOUT_LOCAL_ALLOC) callout_cc_del(c, cc); } /* - * The callout mechanism is based on the work of Adam M. Costello and + * The callout mechanism is based on the work of Adam M. Costello and * George Varghese, published in a technical report entitled "Redesigning * the BSD Callout and Timer Facilities" and modified slightly for inclusion * in FreeBSD by Justin T. Gibbs. The original work on the data structures @@ -691,6 +923,7 @@ skip: * Austin, Texas Nov 1987. */ +#ifndef __rtems__ /* * Software (low priority) clock interrupt. * Run periodic events from timeout queue. @@ -700,65 +933,32 @@ softclock(void *arg) { struct callout_cpu *cc; struct callout *c; - struct callout_tailq *bucket; - int curticks; - int steps; /* #steps since we last allowed interrupts */ - int depth; - int mpcalls; - int lockcalls; - int gcalls; - -#ifndef MAX_SOFTCLOCK_STEPS -#define MAX_SOFTCLOCK_STEPS 100 /* Maximum allowed value of steps. */ -#endif /* MAX_SOFTCLOCK_STEPS */ - - mpcalls = 0; - lockcalls = 0; - gcalls = 0; - depth = 0; - steps = 0; +#ifdef CALLOUT_PROFILING + int depth = 0, gcalls = 0, lockcalls = 0, mpcalls = 0; +#endif + cc = (struct callout_cpu *)arg; CC_LOCK(cc); - while (cc->cc_softticks - 1 != cc->cc_ticks) { - /* - * cc_softticks may be modified by hard clock, so cache - * it while we work on a given bucket. - */ - curticks = cc->cc_softticks; - cc->cc_softticks++; - bucket = &cc->cc_callwheel[curticks & callwheelmask]; - c = TAILQ_FIRST(bucket); - while (c != NULL) { - depth++; - if (c->c_time != curticks) { - c = TAILQ_NEXT(c, c_links.tqe); - ++steps; - if (steps >= MAX_SOFTCLOCK_STEPS) { - cc->cc_next = c; - /* Give interrupts a chance. */ - CC_UNLOCK(cc); - ; /* nothing */ - CC_LOCK(cc); - c = cc->cc_next; - steps = 0; - } - } else { - cc->cc_next = TAILQ_NEXT(c, c_links.tqe); - TAILQ_REMOVE(bucket, c, c_links.tqe); - softclock_call_cc(c, cc, &mpcalls, - &lockcalls, &gcalls); - steps = 0; - c = cc->cc_next; - } - } + while ((c = TAILQ_FIRST(&cc->cc_expireq)) != NULL) { + TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe); + softclock_call_cc(c, cc, +#ifdef CALLOUT_PROFILING + &mpcalls, &lockcalls, &gcalls, +#endif + 0); +#ifdef CALLOUT_PROFILING + ++depth; +#endif } +#ifdef CALLOUT_PROFILING avg_depth += (depth * 1000 - avg_depth) >> 8; avg_mpcalls += (mpcalls * 1000 - avg_mpcalls) >> 8; avg_lockcalls += (lockcalls * 1000 - avg_lockcalls) >> 8; avg_gcalls += (gcalls * 1000 - avg_gcalls) >> 8; - cc->cc_next = NULL; +#endif CC_UNLOCK(cc); } +#endif /* __rtems__ */ /* * timeout -- @@ -771,16 +971,13 @@ softclock(void *arg) * Initialize a handle so that using it with untimeout is benign. * * See AT&T BCI Driver Reference Manual for specification. This - * implementation differs from that one in that although an + * implementation differs from that one in that although an * identification value is returned from timeout, the original * arguments to timeout as well as the identifier are used to * identify entries for untimeout. */ struct callout_handle -timeout(ftn, arg, to_ticks) - timeout_t *ftn; - void *arg; - int to_ticks; +timeout(timeout_t *ftn, void *arg, int to_ticks) { struct callout_cpu *cc; struct callout *new; @@ -802,10 +999,7 @@ timeout(ftn, arg, to_ticks) } void -untimeout(ftn, arg, handle) - timeout_t *ftn; - void *arg; - struct callout_handle handle; +untimeout(timeout_t *ftn, void *arg, struct callout_handle handle) { struct callout_cpu *cc; @@ -829,6 +1023,56 @@ callout_handle_init(struct callout_handle *handle) handle->callout = NULL; } +void +callout_when(sbintime_t sbt, sbintime_t precision, int flags, + sbintime_t *res, sbintime_t *prec_res) +{ + sbintime_t to_sbt, to_pr; + + if ((flags & (C_ABSOLUTE | C_PRECALC)) != 0) { + *res = sbt; + *prec_res = precision; + return; + } + if ((flags & C_HARDCLOCK) != 0 && sbt < tick_sbt) + sbt = tick_sbt; + if ((flags & C_HARDCLOCK) != 0 || +#ifdef NO_EVENTTIMERS + sbt >= sbt_timethreshold) { + to_sbt = getsbinuptime(); + + /* Add safety belt for the case of hz > 1000. */ + to_sbt += tc_tick_sbt - tick_sbt; +#else + sbt >= sbt_tickthreshold) { + /* + * Obtain the time of the last hardclock() call on + * this CPU directly from the kern_clocksource.c. + * This value is per-CPU, but it is equal for all + * active ones. + */ +#ifdef __LP64__ + to_sbt = DPCPU_GET(hardclocktime); +#else + spinlock_enter(); + to_sbt = DPCPU_GET(hardclocktime); + spinlock_exit(); +#endif +#endif + if ((flags & C_HARDCLOCK) == 0) + to_sbt += tick_sbt; + } else + to_sbt = sbinuptime(); + if (SBT_MAX - to_sbt < sbt) + to_sbt = SBT_MAX; + else + to_sbt += sbt; + *res = to_sbt; + to_pr = ((C_PRELGET(flags) < 0) ? sbt >> tc_precexp : + sbt >> C_PRELGET(flags)); + *prec_res = to_pr > precision ? to_pr : precision; +} + /* * New interface; clients allocate their own callout structures. * @@ -846,28 +1090,56 @@ callout_handle_init(struct callout_handle *handle) * callout_deactivate() - marks the callout as having been serviced */ int -callout_reset_on(struct callout *c, int to_ticks, void (*ftn)(void *), - void *arg, int cpu) +callout_reset_sbt_on(struct callout *c, sbintime_t sbt, sbintime_t prec, + void (*ftn)(void *), void *arg, int cpu, int flags) { + sbintime_t to_sbt, precision; struct callout_cpu *cc; - int cancelled = 0; + int cancelled, direct; + int ignore_cpu=0; + + cancelled = 0; + if (cpu == -1) { + ignore_cpu = 1; + } else if ((cpu >= MAXCPU) || + ((CC_CPU(cpu))->cc_inited == 0)) { + /* Invalid CPU spec */ + panic("Invalid CPU in callout %d", cpu); + } + callout_when(sbt, prec, flags, &to_sbt, &precision); + /* + * This flag used to be added by callout_cc_add, but the + * first time you call this we could end up with the + * wrong direct flag if we don't do it before we add. + */ + if (flags & C_DIRECT_EXEC) { + direct = 1; + } else { + direct = 0; + } + KASSERT(!direct || c->c_lock == NULL, + ("%s: direct callout %p has lock", __func__, c)); + cc = callout_lock(c); /* * Don't allow migration of pre-allocated callouts lest they - * become unbalanced. + * become unbalanced or handle the case where the user does + * not care. */ - if (c->c_flags & CALLOUT_LOCAL_ALLOC) + if ((c->c_iflags & CALLOUT_LOCAL_ALLOC) || + ignore_cpu) { cpu = c->c_cpu; - cc = callout_lock(c); - if (cc->cc_curr == c) { + } + + if (cc_exec_curr(cc, direct) == c) { /* * We're being asked to reschedule a callout which is * currently in progress. If there is a lock then we * can cancel the callout if it has not really started. */ - if (c->c_lock != NULL && !cc->cc_cancel) - cancelled = cc->cc_cancel = 1; - if (cc->cc_waiting) { + if (c->c_lock != NULL && !cc_exec_cancel(cc, direct)) + cancelled = cc_exec_cancel(cc, direct) = true; + if (cc_exec_waiting(cc, direct) || cc_exec_drain(cc, direct)) { /* * Someone has called callout_drain to kill this * callout. Don't reschedule. @@ -878,16 +1150,41 @@ callout_reset_on(struct callout *c, int to_ticks, void (*ftn)(void *), CC_UNLOCK(cc); return (cancelled); } +#ifdef SMP + if (callout_migrating(c)) { + /* + * This only occurs when a second callout_reset_sbt_on + * is made after a previous one moved it into + * deferred migration (below). Note we do *not* change + * the prev_cpu even though the previous target may + * be different. + */ + cc_migration_cpu(cc, direct) = cpu; + cc_migration_time(cc, direct) = to_sbt; + cc_migration_prec(cc, direct) = precision; + cc_migration_func(cc, direct) = ftn; + cc_migration_arg(cc, direct) = arg; + cancelled = 1; + CC_UNLOCK(cc); + return (cancelled); + } +#endif } - if (c->c_flags & CALLOUT_PENDING) { - if (cc->cc_next == c) { - cc->cc_next = TAILQ_NEXT(c, c_links.tqe); + if (c->c_iflags & CALLOUT_PENDING) { +#ifndef __rtems__ + if ((c->c_iflags & CALLOUT_PROCESSED) == 0) { +#endif /* __rtems__ */ + if (cc_exec_next(cc) == c) + cc_exec_next(cc) = LIST_NEXT(c, c_links.le); + LIST_REMOVE(c, c_links.le); +#ifndef __rtems__ + } else { + TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe); } - TAILQ_REMOVE(&cc->cc_callwheel[c->c_time & callwheelmask], c, - c_links.tqe); - +#endif /* __rtems__ */ cancelled = 1; - c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING); + c->c_iflags &= ~ CALLOUT_PENDING; + c->c_flags &= ~ CALLOUT_ACTIVE; } #ifdef SMP @@ -897,15 +1194,34 @@ callout_reset_on(struct callout *c, int to_ticks, void (*ftn)(void *), * to a more appropriate moment. */ if (c->c_cpu != cpu) { - if (cc->cc_curr == c) { - cc->cc_migration_cpu = cpu; - cc->cc_migration_ticks = to_ticks; - cc->cc_migration_func = ftn; - cc->cc_migration_arg = arg; - c->c_flags |= CALLOUT_DFRMIGRATION; - CTR5(KTR_CALLOUT, - "migration of %p func %p arg %p in %d to %u deferred", - c, c->c_func, c->c_arg, to_ticks, cpu); + if (cc_exec_curr(cc, direct) == c) { + /* + * Pending will have been removed since we are + * actually executing the callout on another + * CPU. That callout should be waiting on the + * lock the caller holds. If we set both + * active/and/pending after we return and the + * lock on the executing callout proceeds, it + * will then see pending is true and return. + * At the return from the actual callout execution + * the migration will occur in softclock_call_cc + * and this new callout will be placed on the + * new CPU via a call to callout_cpu_switch() which + * will get the lock on the right CPU followed + * by a call callout_cc_add() which will add it there. + * (see above in softclock_call_cc()). + */ + cc_migration_cpu(cc, direct) = cpu; + cc_migration_time(cc, direct) = to_sbt; + cc_migration_prec(cc, direct) = precision; + cc_migration_func(cc, direct) = ftn; + cc_migration_arg(cc, direct) = arg; + c->c_iflags |= (CALLOUT_DFRMIGRATION | CALLOUT_PENDING); + c->c_flags |= CALLOUT_ACTIVE; + CTR6(KTR_CALLOUT, + "migration of %p func %p arg %p in %d.%08x to %u deferred", + c, c->c_func, c->c_arg, (int)(to_sbt >> 32), + (u_int)(to_sbt & 0xffffffff), cpu); CC_UNLOCK(cc); return (cancelled); } @@ -913,9 +1229,10 @@ callout_reset_on(struct callout *c, int to_ticks, void (*ftn)(void *), } #endif - callout_cc_add(c, cc, to_ticks, ftn, arg, cpu); - CTR5(KTR_CALLOUT, "%sscheduled %p func %p arg %p in %d", - cancelled ? "re" : "", c, c->c_func, c->c_arg, to_ticks); + callout_cc_add(c, cc, to_sbt, precision, ftn, arg, cpu, flags); + CTR6(KTR_CALLOUT, "%sscheduled %p func %p arg %p in %d.%08x", + cancelled ? "re" : "", c, c->c_func, c->c_arg, (int)(to_sbt >> 32), + (u_int)(to_sbt & 0xffffffff)); CC_UNLOCK(cc); return (cancelled); @@ -937,25 +1254,26 @@ callout_schedule(struct callout *c, int to_ticks) } int -_callout_stop_safe(c, safe) - struct callout *c; - int safe; +_callout_stop_safe(struct callout *c, int flags, void (*drain)(void *)) { -#ifndef __rtems__ struct callout_cpu *cc, *old_cc; struct lock_class *class; - int use_lock, sq_locked; -#else /* __rtems__ */ - struct callout_cpu *cc; - struct lock_class *class; - int use_lock; + int direct, sq_locked, use_lock; + int cancelled, not_on_a_list; +#ifdef __rtems__ + (void)old_cc; + (void)sq_locked; #endif /* __rtems__ */ + if ((flags & CS_DRAIN) != 0) + WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, c->c_lock, + "calling %s", __func__); + /* * Some old subsystems don't hold Giant while running a callout_stop(), * so just discard this check for the moment. */ - if (!safe && c->c_lock != NULL) { + if ((flags & CS_DRAIN) == 0 && c->c_lock != NULL) { if (c->c_lock == &Giant.lock_object) use_lock = mtx_owned(&Giant); else { @@ -965,6 +1283,11 @@ _callout_stop_safe(c, safe) } } else use_lock = 0; + if (c->c_iflags & CALLOUT_DIRECT) { + direct = 1; + } else { + direct = 0; + } #ifndef __rtems__ sq_locked = 0; @@ -973,6 +1296,28 @@ again: #endif /* __rtems__ */ cc = callout_lock(c); + if ((c->c_iflags & (CALLOUT_DFRMIGRATION | CALLOUT_PENDING)) == + (CALLOUT_DFRMIGRATION | CALLOUT_PENDING) && + ((c->c_flags & CALLOUT_ACTIVE) == CALLOUT_ACTIVE)) { + /* + * Special case where this slipped in while we + * were migrating *as* the callout is about to + * execute. The caller probably holds the lock + * the callout wants. + * + * Get rid of the migration first. Then set + * the flag that tells this code *not* to + * try to remove it from any lists (its not + * on one yet). When the callout wheel runs, + * it will ignore this callout. + */ + c->c_iflags &= ~CALLOUT_PENDING; + c->c_flags &= ~CALLOUT_ACTIVE; + not_on_a_list = 1; + } else { + not_on_a_list = 0; + } + #ifndef __rtems__ /* * If the callout was migrating while the callout cpu lock was @@ -982,7 +1327,7 @@ again: if (sq_locked != 0 && cc != old_cc) { #ifdef SMP CC_UNLOCK(cc); - sleepq_release(&old_cc->cc_waiting); + sleepq_release(&cc_exec_waiting(old_cc, direct)); sq_locked = 0; old_cc = NULL; goto again; @@ -993,36 +1338,23 @@ again: #endif /* __rtems__ */ /* - * If the callout isn't pending, it's not on the queue, so - * don't attempt to remove it from the queue. We can try to - * stop it by other means however. + * If the callout is running, try to stop it or drain it. */ - if (!(c->c_flags & CALLOUT_PENDING)) { - c->c_flags &= ~CALLOUT_ACTIVE; - + if (cc_exec_curr(cc, direct) == c) { /* - * If it wasn't on the queue and it isn't the current - * callout, then we can't stop it, so just bail. + * Succeed we to stop it or not, we must clear the + * active flag - this is what API users expect. */ - if (cc->cc_curr != c) { - CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p", - c, c->c_func, c->c_arg); - CC_UNLOCK(cc); -#ifndef __rtems__ - if (sq_locked) - sleepq_release(&cc->cc_waiting); -#endif /* __rtems__ */ - return (0); - } + c->c_flags &= ~CALLOUT_ACTIVE; - if (safe) { + if ((flags & CS_DRAIN) != 0) { /* * The current callout is running (or just * about to run) and blocking is allowed, so * just wait for the current invocation to * finish. */ - while (cc->cc_curr == c) { + while (cc_exec_curr(cc, direct) == c) { #ifndef __rtems__ /* @@ -1044,7 +1376,8 @@ again: */ if (!sq_locked) { CC_UNLOCK(cc); - sleepq_lock(&cc->cc_waiting); + sleepq_lock( + &cc_exec_waiting(cc, direct)); sq_locked = 1; old_cc = cc; goto again; @@ -1056,13 +1389,16 @@ again: * will be packed up, just let softclock() * take care of it. */ - cc->cc_waiting = 1; + cc_exec_waiting(cc, direct) = true; DROP_GIANT(); CC_UNLOCK(cc); - sleepq_add(&cc->cc_waiting, + sleepq_add( + &cc_exec_waiting(cc, direct), &cc->cc_lock.lock_object, "codrain", SLEEPQ_SLEEP, 0); - sleepq_wait(&cc->cc_waiting, 0); + sleepq_wait( + &cc_exec_waiting(cc, direct), + 0); sq_locked = 0; old_cc = NULL; @@ -1076,84 +1412,144 @@ again: * sleepq_set_timeout() and instead use the * RTEMS watchdog. */ - cc->cc_waiting = 1; - msleep_spin(&cc->cc_waiting, &cc->cc_lock, - "codrain", 0); + cc_exec_waiting(cc, direct) = true; + msleep_spin(&cc_exec_waiting(cc, direct), + &cc->cc_lock, "codrain", 0); #endif /* __rtems__ */ } - } else if (use_lock && !cc->cc_cancel) { + } else if (use_lock && + !cc_exec_cancel(cc, direct) && (drain == NULL)) { + /* * The current callout is waiting for its * lock which we hold. Cancel the callout * and return. After our caller drops the * lock, the callout will be skipped in - * softclock(). + * softclock(). This *only* works with a + * callout_stop() *not* callout_drain() or + * callout_async_drain(). */ - cc->cc_cancel = 1; + cc_exec_cancel(cc, direct) = true; CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p", c, c->c_func, c->c_arg); - KASSERT(!cc_cme_migrating(cc), + KASSERT(!cc_cce_migrating(cc, direct), ("callout wrongly scheduled for migration")); + if (callout_migrating(c)) { + c->c_iflags &= ~CALLOUT_DFRMIGRATION; +#ifdef SMP + cc_migration_cpu(cc, direct) = CPUBLOCK; + cc_migration_time(cc, direct) = 0; + cc_migration_prec(cc, direct) = 0; + cc_migration_func(cc, direct) = NULL; + cc_migration_arg(cc, direct) = NULL; +#endif + } CC_UNLOCK(cc); #ifndef __rtems__ KASSERT(!sq_locked, ("sleepqueue chain locked")); #endif /* __rtems__ */ return (1); - } else if ((c->c_flags & CALLOUT_DFRMIGRATION) != 0) { - c->c_flags &= ~CALLOUT_DFRMIGRATION; + } else if (callout_migrating(c)) { + /* + * The callout is currently being serviced + * and the "next" callout is scheduled at + * its completion with a migration. We remove + * the migration flag so it *won't* get rescheduled, + * but we can't stop the one thats running so + * we return 0. + */ + c->c_iflags &= ~CALLOUT_DFRMIGRATION; +#ifdef SMP + /* + * We can't call cc_cce_cleanup here since + * if we do it will remove .ce_curr and + * its still running. This will prevent a + * reschedule of the callout when the + * execution completes. + */ + cc_migration_cpu(cc, direct) = CPUBLOCK; + cc_migration_time(cc, direct) = 0; + cc_migration_prec(cc, direct) = 0; + cc_migration_func(cc, direct) = NULL; + cc_migration_arg(cc, direct) = NULL; +#endif CTR3(KTR_CALLOUT, "postponing stop %p func %p arg %p", c, c->c_func, c->c_arg); + if (drain) { + cc_exec_drain(cc, direct) = drain; + } CC_UNLOCK(cc); - return (1); + return ((flags & CS_EXECUTING) != 0); } CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p", c, c->c_func, c->c_arg); - CC_UNLOCK(cc); + if (drain) { + cc_exec_drain(cc, direct) = drain; + } #ifndef __rtems__ KASSERT(!sq_locked, ("sleepqueue chain still locked")); #endif /* __rtems__ */ - return (0); - } + cancelled = ((flags & CS_EXECUTING) != 0); + } else + cancelled = 1; + #ifndef __rtems__ if (sq_locked) - sleepq_release(&cc->cc_waiting); + sleepq_release(&cc_exec_waiting(cc, direct)); #endif /* __rtems__ */ - c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING); + if ((c->c_iflags & CALLOUT_PENDING) == 0) { + CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p", + c, c->c_func, c->c_arg); + /* + * For not scheduled and not executing callout return + * negative value. + */ + if (cc_exec_curr(cc, direct) != c) + cancelled = -1; + CC_UNLOCK(cc); + return (cancelled); + } + + c->c_iflags &= ~CALLOUT_PENDING; + c->c_flags &= ~CALLOUT_ACTIVE; CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p", c, c->c_func, c->c_arg); - if (cc->cc_next == c) - cc->cc_next = TAILQ_NEXT(c, c_links.tqe); - TAILQ_REMOVE(&cc->cc_callwheel[c->c_time & callwheelmask], c, - c_links.tqe); + if (not_on_a_list == 0) { +#ifndef __rtems__ + if ((c->c_iflags & CALLOUT_PROCESSED) == 0) { +#endif /* __rtems__ */ + if (cc_exec_next(cc) == c) + cc_exec_next(cc) = LIST_NEXT(c, c_links.le); + LIST_REMOVE(c, c_links.le); +#ifndef __rtems__ + } else { + TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe); + } +#endif /* __rtems__ */ + } callout_cc_del(c, cc); - CC_UNLOCK(cc); - return (1); + return (cancelled); } void -callout_init(c, mpsafe) - struct callout *c; - int mpsafe; +callout_init(struct callout *c, int mpsafe) { bzero(c, sizeof *c); if (mpsafe) { c->c_lock = NULL; - c->c_flags = CALLOUT_RETURNUNLOCKED; + c->c_iflags = CALLOUT_RETURNUNLOCKED; } else { c->c_lock = &Giant.lock_object; - c->c_flags = 0; + c->c_iflags = 0; } c->c_cpu = timeout_cpu; } void -_callout_init_lock(c, lock, flags) - struct callout *c; - struct lock_object *lock; - int flags; +_callout_init_lock(struct callout *c, struct lock_object *lock, int flags) { bzero(c, sizeof *c); c->c_lock = lock; @@ -1164,7 +1560,7 @@ _callout_init_lock(c, lock, flags) KASSERT(lock == NULL || !(LOCK_CLASS(lock)->lc_flags & (LC_SPINLOCK | LC_SLEEPABLE)), ("%s: invalid lock class", __func__)); - c->c_flags = flags & (CALLOUT_RETURNUNLOCKED | CALLOUT_SHAREDLOCK); + c->c_iflags = flags & (CALLOUT_RETURNUNLOCKED | CALLOUT_SHAREDLOCK); c->c_cpu = timeout_cpu; } @@ -1181,12 +1577,11 @@ _callout_init_lock(c, lock, flags) * which set the timer can do the maintanence the timer was for as close * as possible to the originally intended time. Testing this code for a * week showed that resuming from a suspend resulted in 22 to 25 timers - * firing, which seemed independant on whether the suspend was 2 hours or + * firing, which seemed independent on whether the suspend was 2 hours or * 2 days. Your milage may vary. - Ken Key <key@cs.utk.edu> */ void -adjust_timeout_calltodo(time_change) - struct timeval *time_change; +adjust_timeout_calltodo(struct timeval *time_change) { register struct callout *p; unsigned long delta_ticks; @@ -1200,11 +1595,11 @@ adjust_timeout_calltodo(time_change) if (time_change->tv_sec < 0) return; else if (time_change->tv_sec <= LONG_MAX / 1000000) - delta_ticks = (time_change->tv_sec * 1000000 + - time_change->tv_usec + (tick - 1)) / tick + 1; + delta_ticks = howmany(time_change->tv_sec * 1000000 + + time_change->tv_usec, tick) + 1; else if (time_change->tv_sec <= LONG_MAX / hz) delta_ticks = time_change->tv_sec * hz + - (time_change->tv_usec + (tick - 1)) / tick + 1; + howmany(time_change->tv_usec, tick) + 1; else delta_ticks = LONG_MAX; @@ -1233,3 +1628,152 @@ adjust_timeout_calltodo(time_change) return; } #endif /* APM_FIXUP_CALLTODO */ + +static int +flssbt(sbintime_t sbt) +{ + + sbt += (uint64_t)sbt >> 1; + if (sizeof(long) >= sizeof(sbintime_t)) + return (flsl(sbt)); + if (sbt >= SBT_1S) + return (flsl(((uint64_t)sbt) >> 32) + 32); + return (flsl(sbt)); +} + +/* + * Dump immediate statistic snapshot of the scheduled callouts. + */ +static int +sysctl_kern_callout_stat(SYSCTL_HANDLER_ARGS) +{ + struct callout *tmp; + struct callout_cpu *cc; + struct callout_list *sc; + sbintime_t maxpr, maxt, medpr, medt, now, spr, st, t; + int ct[64], cpr[64], ccpbk[32]; + int error, val, i, count, tcum, pcum, maxc, c, medc; +#ifdef SMP + int cpu; +#endif + + val = 0; + error = sysctl_handle_int(oidp, &val, 0, req); + if (error != 0 || req->newptr == NULL) + return (error); + count = maxc = 0; + st = spr = maxt = maxpr = 0; + bzero(ccpbk, sizeof(ccpbk)); + bzero(ct, sizeof(ct)); + bzero(cpr, sizeof(cpr)); + now = sbinuptime(); +#ifdef SMP + CPU_FOREACH(cpu) { + cc = CC_CPU(cpu); +#else + cc = CC_CPU(timeout_cpu); +#endif + CC_LOCK(cc); + for (i = 0; i < callwheelsize; i++) { + sc = &cc->cc_callwheel[i]; + c = 0; + LIST_FOREACH(tmp, sc, c_links.le) { + c++; + t = tmp->c_time - now; + if (t < 0) + t = 0; + st += t / SBT_1US; + spr += tmp->c_precision / SBT_1US; + if (t > maxt) + maxt = t; + if (tmp->c_precision > maxpr) + maxpr = tmp->c_precision; + ct[flssbt(t)]++; + cpr[flssbt(tmp->c_precision)]++; + } + if (c > maxc) + maxc = c; + ccpbk[fls(c + c / 2)]++; + count += c; + } + CC_UNLOCK(cc); +#ifdef SMP + } +#endif + + for (i = 0, tcum = 0; i < 64 && tcum < count / 2; i++) + tcum += ct[i]; + medt = (i >= 2) ? (((sbintime_t)1) << (i - 2)) : 0; + for (i = 0, pcum = 0; i < 64 && pcum < count / 2; i++) + pcum += cpr[i]; + medpr = (i >= 2) ? (((sbintime_t)1) << (i - 2)) : 0; + for (i = 0, c = 0; i < 32 && c < count / 2; i++) + c += ccpbk[i]; + medc = (i >= 2) ? (1 << (i - 2)) : 0; + + printf("Scheduled callouts statistic snapshot:\n"); + printf(" Callouts: %6d Buckets: %6d*%-3d Bucket size: 0.%06ds\n", + count, callwheelsize, mp_ncpus, 1000000 >> CC_HASH_SHIFT); + printf(" C/Bk: med %5d avg %6d.%06jd max %6d\n", + medc, + count / callwheelsize / mp_ncpus, + (uint64_t)count * 1000000 / callwheelsize / mp_ncpus % 1000000, + maxc); + printf(" Time: med %5jd.%06jds avg %6jd.%06jds max %6jd.%06jds\n", + medt / SBT_1S, (medt & 0xffffffff) * 1000000 >> 32, + (st / count) / 1000000, (st / count) % 1000000, + maxt / SBT_1S, (maxt & 0xffffffff) * 1000000 >> 32); + printf(" Prec: med %5jd.%06jds avg %6jd.%06jds max %6jd.%06jds\n", + medpr / SBT_1S, (medpr & 0xffffffff) * 1000000 >> 32, + (spr / count) / 1000000, (spr / count) % 1000000, + maxpr / SBT_1S, (maxpr & 0xffffffff) * 1000000 >> 32); + printf(" Distribution: \tbuckets\t time\t tcum\t" + " prec\t pcum\n"); + for (i = 0, tcum = pcum = 0; i < 64; i++) { + if (ct[i] == 0 && cpr[i] == 0) + continue; + t = (i != 0) ? (((sbintime_t)1) << (i - 1)) : 0; + tcum += ct[i]; + pcum += cpr[i]; + printf(" %10jd.%06jds\t 2**%d\t%7d\t%7d\t%7d\t%7d\n", + t / SBT_1S, (t & 0xffffffff) * 1000000 >> 32, + i - 1 - (32 - CC_HASH_SHIFT), + ct[i], tcum, cpr[i], pcum); + } + return (error); +} +SYSCTL_PROC(_kern, OID_AUTO, callout_stat, + CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, + 0, 0, sysctl_kern_callout_stat, "I", + "Dump immediate statistic snapshot of the scheduled callouts"); + +#ifdef DDB +static void +_show_callout(struct callout *c) +{ + + db_printf("callout %p\n", c); +#define C_DB_PRINTF(f, e) db_printf(" %s = " f "\n", #e, c->e); + db_printf(" &c_links = %p\n", &(c->c_links)); + C_DB_PRINTF("%" PRId64, c_time); + C_DB_PRINTF("%" PRId64, c_precision); + C_DB_PRINTF("%p", c_arg); + C_DB_PRINTF("%p", c_func); + C_DB_PRINTF("%p", c_lock); + C_DB_PRINTF("%#x", c_flags); + C_DB_PRINTF("%#x", c_iflags); + C_DB_PRINTF("%d", c_cpu); +#undef C_DB_PRINTF +} + +DB_SHOW_COMMAND(callout, db_show_callout) +{ + + if (!have_addr) { + db_printf("usage: show callout <struct callout *>\n"); + return; + } + + _show_callout((struct callout *)addr); +} +#endif /* DDB */ |