1 files changed, 940 insertions, 396 deletions
diff --git a/freebsd/sys/kern/kern_timeout.c b/freebsd/sys/kern/kern_timeout.c
index 00024aa3..37ec0956 100644
--- a/freebsd/sys/kern/kern_timeout.c
+++ b/freebsd/sys/kern/kern_timeout.c
@@ -39,13 +39,18 @@
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
-#include <rtems/bsd/local/opt_kdtrace.h>
+#include <rtems/bsd/local/opt_callout_profiling.h>
+#include <rtems/bsd/local/opt_ddb.h>
+#if defined(__arm__) || defined(__rtems__)
+#include <rtems/bsd/local/opt_timer.h>
+#endif
+#include <rtems/bsd/local/opt_rss.h>
 
 #include <rtems/bsd/sys/param.h>
 #include <sys/systm.h>
 #include <sys/bus.h>
 #include <sys/callout.h>
-#include <sys/condvar.h>
+#include <sys/file.h>
 #include <sys/interrupt.h>
 #include <sys/kernel.h>
 #include <sys/ktr.h>
@@ -58,19 +63,24 @@ __FBSDID("$FreeBSD$");
 #include <sys/sysctl.h>
 #include <sys/smp.h>
 
+#ifdef DDB
+#include <ddb/ddb.h>
+#include <machine/_inttypes.h>
+#endif
+
 #ifdef SMP
 #include <machine/cpu.h>
 #endif
 
-#ifdef __rtems__
-#define ncallout 16
-#endif /* __rtems__ */
+#ifndef NO_EVENTTIMERS
+DPCPU_DECLARE(sbintime_t, hardclocktime);
+#endif
+
 SDT_PROVIDER_DEFINE(callout_execute);
-SDT_PROBE_DEFINE1(callout_execute, kernel, , callout__start,
-    "struct callout *");
-SDT_PROBE_DEFINE1(callout_execute, kernel, , callout__end,
-    "struct callout *");
+SDT_PROBE_DEFINE1(callout_execute, , , callout__start, "struct callout *");
+SDT_PROBE_DEFINE1(callout_execute, , , callout__end, "struct callout *");
 
+#ifdef CALLOUT_PROFILING
 static int avg_depth;
 SYSCTL_INT(_debug, OID_AUTO, to_avg_depth, CTLFLAG_RD, &avg_depth, 0,
     "Average number of items examined per softclock call. Units = 1/1000");
@@ -83,65 +93,106 @@ SYSCTL_INT(_debug, OID_AUTO, to_avg_lockcalls, CTLFLAG_RD, &avg_lockcalls, 0,
 static int avg_mpcalls;
 SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls, CTLFLAG_RD, &avg_mpcalls, 0,
     "Average number of MP callouts made per softclock call. Units = 1/1000");
+static int avg_depth_dir;
+SYSCTL_INT(_debug, OID_AUTO, to_avg_depth_dir, CTLFLAG_RD, &avg_depth_dir, 0,
+    "Average number of direct callouts examined per callout_process call. "
+    "Units = 1/1000");
+static int avg_lockcalls_dir;
+SYSCTL_INT(_debug, OID_AUTO, to_avg_lockcalls_dir, CTLFLAG_RD,
+    &avg_lockcalls_dir, 0, "Average number of lock direct callouts made per "
+    "callout_process call. Units = 1/1000");
+static int avg_mpcalls_dir;
+SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls_dir, CTLFLAG_RD, &avg_mpcalls_dir,
+    0, "Average number of MP direct callouts made per callout_process call. "
+    "Units = 1/1000");
+#endif
+
+#ifndef __rtems__
+static int ncallout;
+SYSCTL_INT(_kern, OID_AUTO, ncallout, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &ncallout, 0,
+    "Number of entries in callwheel and size of timeout() preallocation");
+#else /* __rtems__ */
+#define ncallout 16
+#endif /* __rtems__ */
+
+#ifdef	RSS
+static int pin_default_swi = 1;
+static int pin_pcpu_swi = 1;
+#else
+static int pin_default_swi = 0;
+static int pin_pcpu_swi = 0;
+#endif
+
+SYSCTL_INT(_kern, OID_AUTO, pin_default_swi, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &pin_default_swi,
+    0, "Pin the default (non-per-cpu) swi (shared with PCPU 0 swi)");
+SYSCTL_INT(_kern, OID_AUTO, pin_pcpu_swi, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &pin_pcpu_swi,
+    0, "Pin the per-CPU swis (except PCPU 0, which is also default");
+
 /*
  * TODO:
  *	allocate more timeout table slots when table overflows.
  */
-int callwheelsize, callwheelbits, callwheelmask;
+u_int callwheelsize, callwheelmask;
 
 /*
- * The callout cpu migration entity represents informations necessary for
- * describing the migrating callout to the new callout cpu.
+ * The callout cpu exec entities represent informations necessary for
+ * describing the state of callouts currently running on the CPU and the ones
+ * necessary for migrating callouts to the new callout cpu. In particular,
+ * the first entry of the array cc_exec_entity holds informations for callout
+ * running in SWI thread context, while the second one holds informations
+ * for callout running directly from hardware interrupt context.
  * The cached informations are very important for deferring migration when
  * the migrating callout is already running.
  */
-struct cc_mig_ent {
+struct cc_exec {
+	struct callout		*cc_curr;
+	void			(*cc_drain)(void *);
 #ifdef SMP
-	void	(*ce_migration_func)(void *);
-	void	*ce_migration_arg;
-	int	ce_migration_cpu;
-	int	ce_migration_ticks;
+	void			(*ce_migration_func)(void *);
+	void			*ce_migration_arg;
+	int			ce_migration_cpu;
+	sbintime_t		ce_migration_time;
+	sbintime_t		ce_migration_prec;
 #endif
+	bool			cc_cancel;
+	bool			cc_waiting;
 };
-	
+
 /*
  * There is one struct callout_cpu per cpu, holding all relevant
  * state for the callout processing thread on the individual CPU.
- * In particular:
- *	cc_ticks is incremented once per tick in callout_cpu().
- *	It tracks the global 'ticks' but in a way that the individual
- *	threads should not worry about races in the order in which
- *	hardclock() and hardclock_cpu() run on the various CPUs.
- *	cc_softclock is advanced in callout_cpu() to point to the
- *	first entry in cc_callwheel that may need handling. In turn,
- *	a softclock() is scheduled so it can serve the various entries i
- *	such that cc_softclock <= i <= cc_ticks .
- *	XXX maybe cc_softclock and cc_ticks should be volatile ?
- *
- *	cc_ticks is also used in callout_reset_cpu() to determine
- *	when the callout should be served.
  */
 struct callout_cpu {
-	struct cc_mig_ent	cc_migrating_entity;
-	struct mtx		cc_lock;
-	struct callout		*cc_callout;
-	struct callout_tailq	*cc_callwheel;
-	struct callout_list	cc_callfree;
+	struct mtx_padalign	cc_lock;
+	struct cc_exec 		cc_exec_entity[2];
 	struct callout		*cc_next;
-	struct callout		*cc_curr;
+	struct callout		*cc_callout;
+	struct callout_list	*cc_callwheel;
+#ifndef __rtems__
+	struct callout_tailq	cc_expireq;
+#endif /* __rtems__ */
+	struct callout_slist	cc_callfree;
+	sbintime_t		cc_firstevent;
+	sbintime_t		cc_lastscan;
 	void			*cc_cookie;
-	int 			cc_ticks;
-	int 			cc_softticks;
-	int			cc_cancel;
-	int			cc_waiting;
-	int 			cc_firsttick;
+	u_int			cc_bucket;
+	u_int			cc_inited;
+	char			cc_ktr_event_name[20];
 };
 
+#define	callout_migrating(c)	((c)->c_iflags & CALLOUT_DFRMIGRATION)
+
+#define	cc_exec_curr(cc, dir)		cc->cc_exec_entity[dir].cc_curr
+#define	cc_exec_drain(cc, dir)		cc->cc_exec_entity[dir].cc_drain
+#define	cc_exec_next(cc)		cc->cc_next
+#define	cc_exec_cancel(cc, dir)		cc->cc_exec_entity[dir].cc_cancel
+#define	cc_exec_waiting(cc, dir)	cc->cc_exec_entity[dir].cc_waiting
 #ifdef SMP
-#define	cc_migration_func	cc_migrating_entity.ce_migration_func
-#define	cc_migration_arg	cc_migrating_entity.ce_migration_arg
-#define	cc_migration_cpu	cc_migrating_entity.ce_migration_cpu
-#define	cc_migration_ticks	cc_migrating_entity.ce_migration_ticks
+#define	cc_migration_func(cc, dir)	cc->cc_exec_entity[dir].ce_migration_func
+#define	cc_migration_arg(cc, dir)	cc->cc_exec_entity[dir].ce_migration_arg
+#define	cc_migration_cpu(cc, dir)	cc->cc_exec_entity[dir].ce_migration_cpu
+#define	cc_migration_time(cc, dir)	cc->cc_exec_entity[dir].ce_migration_time
+#define	cc_migration_prec(cc, dir)	cc->cc_exec_entity[dir].ce_migration_prec
 
 struct callout_cpu cc_cpu[MAXCPU];
 #define	CPUBLOCK	MAXCPU
@@ -157,39 +208,49 @@ struct callout_cpu cc_cpu;
 #define	CC_LOCK_ASSERT(cc)	mtx_assert(&(cc)->cc_lock, MA_OWNED)
 
 static int timeout_cpu;
-void (*callout_new_inserted)(int cpu, int ticks) = NULL;
+
+static void	callout_cpu_init(struct callout_cpu *cc, int cpu);
+static void	softclock_call_cc(struct callout *c, struct callout_cpu *cc,
+#ifdef CALLOUT_PROFILING
+		    int *mpcalls, int *lockcalls, int *gcalls,
+#endif
+		    int direct);
 
 static MALLOC_DEFINE(M_CALLOUT, "callout", "Callout datastructures");
 
 /**
  * Locked by cc_lock:
- *   cc_curr         - If a callout is in progress, it is curr_callout.
- *                     If curr_callout is non-NULL, threads waiting in
+ *   cc_curr         - If a callout is in progress, it is cc_curr.
+ *                     If cc_curr is non-NULL, threads waiting in
  *                     callout_drain() will be woken up as soon as the
  *                     relevant callout completes.
- *   cc_cancel       - Changing to 1 with both callout_lock and c_lock held
+ *   cc_cancel       - Changing to 1 with both callout_lock and cc_lock held
  *                     guarantees that the current callout will not run.
  *                     The softclock() function sets this to 0 before it
  *                     drops callout_lock to acquire c_lock, and it calls
  *                     the handler only if curr_cancelled is still 0 after
- *                     c_lock is successfully acquired.
+ *                     cc_lock is successfully acquired.
  *   cc_waiting      - If a thread is waiting in callout_drain(), then
  *                     callout_wait is nonzero.  Set only when
- *                     curr_callout is non-NULL.
+ *                     cc_curr is non-NULL.
  */
 
 /*
- * Resets the migration entity tied to a specific callout cpu.
+ * Resets the execution entity tied to a specific callout cpu.
  */
 static void
-cc_cme_cleanup(struct callout_cpu *cc)
+cc_cce_cleanup(struct callout_cpu *cc, int direct)
 {
 
+	cc_exec_curr(cc, direct) = NULL;
+	cc_exec_cancel(cc, direct) = false;
+	cc_exec_waiting(cc, direct) = false;
 #ifdef SMP
-	cc->cc_migration_cpu = CPUBLOCK;
-	cc->cc_migration_ticks = 0;
-	cc->cc_migration_func = NULL;
-	cc->cc_migration_arg = NULL;
+	cc_migration_cpu(cc, direct) = CPUBLOCK;
+	cc_migration_time(cc, direct) = 0;
+	cc_migration_prec(cc, direct) = 0;
+	cc_migration_func(cc, direct) = NULL;
+	cc_migration_arg(cc, direct) = NULL;
 #endif
 }
 
@@ -197,27 +258,23 @@ cc_cme_cleanup(struct callout_cpu *cc)
  * Checks if migration is requested by a specific callout cpu.
  */
 static int
-cc_cme_migrating(struct callout_cpu *cc)
+cc_cce_migrating(struct callout_cpu *cc, int direct)
 {
 
 #ifdef SMP
-	return (cc->cc_migration_cpu != CPUBLOCK);
+	return (cc_migration_cpu(cc, direct) != CPUBLOCK);
 #else
 	return (0);
 #endif
 }
 
 /*
- * kern_timeout_callwheel_alloc() - kernel low level callwheel initialization 
- *
- *	This code is called very early in the kernel initialization sequence,
- *	and may be called more then once.
+ * Kernel low level callwheel initialization
+ * called on cpu0 during kernel startup.
  */
 #ifdef __rtems__
 static void rtems_bsd_timeout_init_early(void *);
 
-static void callout_cpu_init(struct callout_cpu *);
-
 static void
 rtems_bsd_callout_timer(rtems_id id, void *arg)
 {
@@ -228,7 +285,7 @@ rtems_bsd_callout_timer(rtems_id id, void *arg)
 	sc = rtems_timer_reset(id);
 	BSD_ASSERT(sc == RTEMS_SUCCESSFUL);
 
-	callout_tick();
+	callout_process(sbinuptime());
 }
 
 static void
@@ -253,63 +310,90 @@ SYSINIT(rtems_bsd_timeout_late, SI_SUB_LAST, SI_ORDER_FIRST,
     rtems_bsd_timeout_init_late, NULL);
 
 static void
-rtems_bsd_timeout_init_early(void *unused)
+rtems_bsd_timeout_init_early(void *dummy)
 #else /* __rtems__ */
-caddr_t
-kern_timeout_callwheel_alloc(caddr_t v)
+static void
+callout_callwheel_init(void *dummy)
 #endif /* __rtems__ */
 {
 	struct callout_cpu *cc;
 #ifdef __rtems__
-	caddr_t v;
+	(void) dummy;
+#endif /* __rtems__ */
 
-	(void) unused;
+	/*
+	 * Calculate the size of the callout wheel and the preallocated
+	 * timeout() structures.
+	 * XXX: Clip callout to result of previous function of maxusers
+	 * maximum 384.  This is still huge, but acceptable.
+	 */
+	memset(CC_CPU(0), 0, sizeof(cc_cpu));
+#ifndef __rtems__
+	ncallout = imin(16 + maxproc + maxfiles, 18508);
+	TUNABLE_INT_FETCH("kern.ncallout", &ncallout);
 #endif /* __rtems__ */
 
-	timeout_cpu = PCPU_GET(cpuid);
-	cc = CC_CPU(timeout_cpu);
 	/*
-	 * Calculate callout wheel size
+	 * Calculate callout wheel size, should be next power of two higher
+	 * than 'ncallout'.
 	 */
-	for (callwheelsize = 1, callwheelbits = 0;
-	     callwheelsize < ncallout;
-	     callwheelsize <<= 1, ++callwheelbits)
-		;
+	callwheelsize = 1 << fls(ncallout);
 	callwheelmask = callwheelsize - 1;
 
-#ifdef __rtems__
-	v = malloc(ncallout * sizeof(*cc->cc_callout) + callwheelsize
-	    * sizeof(*cc->cc_callwheel), M_CALLOUT, M_ZERO | M_WAITOK);
-#endif /* __rtems__ */
-	cc->cc_callout = (struct callout *)v;
-	v = (caddr_t)(cc->cc_callout + ncallout);
-	cc->cc_callwheel = (struct callout_tailq *)v;
-	v = (caddr_t)(cc->cc_callwheel + callwheelsize);
 #ifndef __rtems__
-	return(v);
-#else /* __rtems__ */
-	callout_cpu_init(cc);
+	/*
+	 * Fetch whether we're pinning the swi's or not.
+	 */
+	TUNABLE_INT_FETCH("kern.pin_default_swi", &pin_default_swi);
+	TUNABLE_INT_FETCH("kern.pin_pcpu_swi", &pin_pcpu_swi);
 #endif /* __rtems__ */
+
+	/*
+	 * Only cpu0 handles timeout(9) and receives a preallocation.
+	 *
+	 * XXX: Once all timeout(9) consumers are converted this can
+	 * be removed.
+	 */
+	timeout_cpu = PCPU_GET(cpuid);
+	cc = CC_CPU(timeout_cpu);
+	cc->cc_callout = malloc(ncallout * sizeof(struct callout),
+	    M_CALLOUT, M_WAITOK);
+	callout_cpu_init(cc, timeout_cpu);
 }
+#ifndef __rtems__
+SYSINIT(callwheel_init, SI_SUB_CPU, SI_ORDER_ANY, callout_callwheel_init, NULL);
+#endif /* __rtems__ */
 
+/*
+ * Initialize the per-cpu callout structures.
+ */
 static void
-callout_cpu_init(struct callout_cpu *cc)
+callout_cpu_init(struct callout_cpu *cc, int cpu)
 {
 	struct callout *c;
 	int i;
 
 	mtx_init(&cc->cc_lock, "callout", NULL, MTX_SPIN | MTX_RECURSE);
 	SLIST_INIT(&cc->cc_callfree);
-	for (i = 0; i < callwheelsize; i++) {
-		TAILQ_INIT(&cc->cc_callwheel[i]);
-	}
-	cc_cme_cleanup(cc);
-	if (cc->cc_callout == NULL)
+	cc->cc_inited = 1;
+	cc->cc_callwheel = malloc(sizeof(struct callout_list) * callwheelsize,
+	    M_CALLOUT, M_WAITOK);
+	for (i = 0; i < callwheelsize; i++)
+		LIST_INIT(&cc->cc_callwheel[i]);
+#ifndef __rtems__
+	TAILQ_INIT(&cc->cc_expireq);
+#endif /* __rtems__ */
+	cc->cc_firstevent = SBT_MAX;
+	for (i = 0; i < 2; i++)
+		cc_cce_cleanup(cc, i);
+	snprintf(cc->cc_ktr_event_name, sizeof(cc->cc_ktr_event_name),
+	    "callwheel cpu %d", cpu);
+	if (cc->cc_callout == NULL)	/* Only cpu0 handles timeout(9) */
 		return;
 	for (i = 0; i < ncallout; i++) {
 		c = &cc->cc_callout[i];
 		callout_init(c, 0);
-		c->c_flags = CALLOUT_LOCAL_ALLOC;
+		c->c_iflags = CALLOUT_LOCAL_ALLOC;
 		SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle);
 	}
 }
@@ -346,109 +430,201 @@ callout_cpu_switch(struct callout *c, struct callout_cpu *cc, int new_cpu)
 
 #ifndef __rtems__
 /*
- * kern_timeout_callwheel_init() - initialize previously reserved callwheel
- *				   space.
- *
- *	This code is called just once, after the space reserved for the
- *	callout wheel has been finalized.
- */
-void
-kern_timeout_callwheel_init(void)
-{
-	callout_cpu_init(CC_CPU(timeout_cpu));
-}
-#endif /* __rtems__ */
-
-/*
  * Start standard softclock thread.
  */
 static void
 start_softclock(void *dummy)
 {
 	struct callout_cpu *cc;
+	char name[MAXCOMLEN];
 #ifdef SMP
 	int cpu;
+	struct intr_event *ie;
 #endif
 
 	cc = CC_CPU(timeout_cpu);
-	if (swi_add(&clk_intr_event, "clock", softclock, cc, SWI_CLOCK,
+	snprintf(name, sizeof(name), "clock (%d)", timeout_cpu);
+	if (swi_add(&clk_intr_event, name, softclock, cc, SWI_CLOCK,
 	    INTR_MPSAFE, &cc->cc_cookie))
 		panic("died while creating standard software ithreads");
+	if (pin_default_swi &&
+	    (intr_event_bind(clk_intr_event, timeout_cpu) != 0)) {
+		printf("%s: timeout clock couldn't be pinned to cpu %d\n",
+		    __func__,
+		    timeout_cpu);
+	}
+
 #ifdef SMP
 	CPU_FOREACH(cpu) {
 		if (cpu == timeout_cpu)
 			continue;
 		cc = CC_CPU(cpu);
-		if (swi_add(NULL, "clock", softclock, cc, SWI_CLOCK,
+		cc->cc_callout = NULL;	/* Only cpu0 handles timeout(9). */
+		callout_cpu_init(cc, cpu);
+		snprintf(name, sizeof(name), "clock (%d)", cpu);
+		ie = NULL;
+		if (swi_add(&ie, name, softclock, cc, SWI_CLOCK,
 		    INTR_MPSAFE, &cc->cc_cookie))
 			panic("died while creating standard software ithreads");
-		cc->cc_callout = NULL;	/* Only cpu0 handles timeout(). */
-		cc->cc_callwheel = malloc(
-		    sizeof(struct callout_tailq) * callwheelsize, M_CALLOUT,
-		    M_WAITOK);
-		callout_cpu_init(cc);
+		if (pin_pcpu_swi && (intr_event_bind(ie, cpu) != 0)) {
+			printf("%s: per-cpu clock couldn't be pinned to "
+			    "cpu %d\n",
+			    __func__,
+			    cpu);
+		}
 	}
 #endif
 }
-
 SYSINIT(start_softclock, SI_SUB_SOFTINTR, SI_ORDER_FIRST, start_softclock, NULL);
+#endif /* __rtems__ */
+
+#define	CC_HASH_SHIFT	8
+
+static inline u_int
+callout_hash(sbintime_t sbt)
+{
+
+	return (sbt >> (32 - CC_HASH_SHIFT));
+}
+
+static inline u_int
+callout_get_bucket(sbintime_t sbt)
+{
+
+	return (callout_hash(sbt) & callwheelmask);
+}
 
 void
-callout_tick(void)
+callout_process(sbintime_t now)
 {
+	struct callout *tmp, *tmpn;
 	struct callout_cpu *cc;
-	int need_softclock;
-	int bucket;
+	struct callout_list *sc;
+	sbintime_t first, last, max, tmp_max;
+	uint32_t lookahead;
+	u_int firstb, lastb, nowb;
+#ifdef CALLOUT_PROFILING
+	int depth_dir = 0, mpcalls_dir = 0, lockcalls_dir = 0;
+#endif
 
-	/*
-	 * Process callouts at a very low cpu priority, so we don't keep the
-	 * relatively high clock interrupt priority any longer than necessary.
-	 */
-	need_softclock = 0;
 	cc = CC_SELF();
 	mtx_lock_spin_flags(&cc->cc_lock, MTX_QUIET);
-	cc->cc_firsttick = cc->cc_ticks = ticks;
-	for (; (cc->cc_softticks - cc->cc_ticks) <= 0; cc->cc_softticks++) {
-		bucket = cc->cc_softticks & callwheelmask;
-		if (!TAILQ_EMPTY(&cc->cc_callwheel[bucket])) {
-			need_softclock = 1;
-			break;
-		}
+
+	/* Compute the buckets of the last scan and present times. */
+	firstb = callout_hash(cc->cc_lastscan);
+	cc->cc_lastscan = now;
+	nowb = callout_hash(now);
+
+	/* Compute the last bucket and minimum time of the bucket after it. */
+	if (nowb == firstb)
+		lookahead = (SBT_1S / 16);
+	else if (nowb - firstb == 1)
+		lookahead = (SBT_1S / 8);
+	else
+		lookahead = (SBT_1S / 2);
+	first = last = now;
+	first += (lookahead / 2);
+	last += lookahead;
+	last &= (0xffffffffffffffffLLU << (32 - CC_HASH_SHIFT));
+	lastb = callout_hash(last) - 1;
+	max = last;
+
+	/*
+	 * Check if we wrapped around the entire wheel from the last scan.
+	 * In case, we need to scan entirely the wheel for pending callouts.
+	 */
+	if (lastb - firstb >= callwheelsize) {
+		lastb = firstb + callwheelsize - 1;
+		if (nowb - firstb >= callwheelsize)
+			nowb = lastb;
 	}
+
+	/* Iterate callwheel from firstb to nowb and then up to lastb. */
+	do {
+		sc = &cc->cc_callwheel[firstb & callwheelmask];
+		tmp = LIST_FIRST(sc);
+		while (tmp != NULL) {
+			/* Run the callout if present time within allowed. */
+			if (tmp->c_time <= now) {
+#ifndef __rtems__
+				/*
+				 * Consumer told us the callout may be run
+				 * directly from hardware interrupt context.
+				 */
+				if (tmp->c_iflags & CALLOUT_DIRECT) {
+#endif /* __rtems__ */
+#ifdef CALLOUT_PROFILING
+					++depth_dir;
+#endif
+					cc_exec_next(cc) =
+					    LIST_NEXT(tmp, c_links.le);
+					cc->cc_bucket = firstb & callwheelmask;
+					LIST_REMOVE(tmp, c_links.le);
+					softclock_call_cc(tmp, cc,
+#ifdef CALLOUT_PROFILING
+					    &mpcalls_dir, &lockcalls_dir, NULL,
+#endif
+					    1);
+					tmp = cc_exec_next(cc);
+					cc_exec_next(cc) = NULL;
+#ifndef __rtems__
+				} else {
+					tmpn = LIST_NEXT(tmp, c_links.le);
+					LIST_REMOVE(tmp, c_links.le);
+					TAILQ_INSERT_TAIL(&cc->cc_expireq,
+					    tmp, c_links.tqe);
+					tmp->c_iflags |= CALLOUT_PROCESSED;
+					tmp = tmpn;
+				}
+#endif /* __rtems__ */
+				continue;
+			}
+			/* Skip events from distant future. */
+			if (tmp->c_time >= max)
+				goto next;
+			/*
+			 * Event minimal time is bigger than present maximal
+			 * time, so it cannot be aggregated.
+			 */
+			if (tmp->c_time > last) {
+				lastb = nowb;
+				goto next;
+			}
+			/* Update first and last time, respecting this event. */
+			if (tmp->c_time < first)
+				first = tmp->c_time;
+			tmp_max = tmp->c_time + tmp->c_precision;
+			if (tmp_max < last)
+				last = tmp_max;
+next:
+			tmp = LIST_NEXT(tmp, c_links.le);
+		}
+		/* Proceed with the next bucket. */
+		firstb++;
+		/*
+		 * Stop if we looked after present time and found
+		 * some event we can't execute at now.
+		 * Stop if we looked far enough into the future.
+		 */
+	} while (((int)(firstb - lastb)) <= 0);
+	cc->cc_firstevent = last;
+#ifndef NO_EVENTTIMERS
+	cpu_new_callout(curcpu, last, first);
+#endif
+#ifdef CALLOUT_PROFILING
+	avg_depth_dir += (depth_dir * 1000 - avg_depth_dir) >> 8;
+	avg_mpcalls_dir += (mpcalls_dir * 1000 - avg_mpcalls_dir) >> 8;
+	avg_lockcalls_dir += (lockcalls_dir * 1000 - avg_lockcalls_dir) >> 8;
+#endif
 	mtx_unlock_spin_flags(&cc->cc_lock, MTX_QUIET);
+#ifndef __rtems__
 	/*
 	 * swi_sched acquires the thread lock, so we don't want to call it
 	 * with cc_lock held; incorrect locking order.
 	 */
-	if (need_softclock)
+	if (!TAILQ_EMPTY(&cc->cc_expireq))
 		swi_sched(cc->cc_cookie, 0);
-}
-
-int
-callout_tickstofirst(int limit)
-{
-	struct callout_cpu *cc;
-	struct callout *c;
-	struct callout_tailq *sc;
-	int curticks;
-	int skip = 1;
-
-	cc = CC_SELF();
-	mtx_lock_spin_flags(&cc->cc_lock, MTX_QUIET);
-	curticks = cc->cc_ticks;
-	while( skip < ncallout && skip < limit ) {
-		sc = &cc->cc_callwheel[ (curticks+skip) & callwheelmask ];
-		/* search scanning ticks */
-		TAILQ_FOREACH( c, sc, c_links.tqe ){
-			if (c->c_time - curticks <= ncallout)
-				goto out;
-		}
-		skip++;
-	}
-out:
-	cc->cc_firsttick = curticks + skip;
-	mtx_unlock_spin_flags(&cc->cc_lock, MTX_QUIET);
-	return (skip);
+#endif /* __rtems__ */
 }
 
 static struct callout_cpu *
@@ -476,169 +652,224 @@ callout_lock(struct callout *c)
 }
 
 static void
-callout_cc_add(struct callout *c, struct callout_cpu *cc, int to_ticks,
-    void (*func)(void *), void *arg, int cpu)
+callout_cc_add(struct callout *c, struct callout_cpu *cc,
+    sbintime_t sbt, sbintime_t precision, void (*func)(void *),
+    void *arg, int cpu, int flags)
 {
+	int bucket;
 
 	CC_LOCK_ASSERT(cc);
-
-	if (to_ticks <= 0)
-		to_ticks = 1;
+	if (sbt < cc->cc_lastscan)
+		sbt = cc->cc_lastscan;
 	c->c_arg = arg;
-	c->c_flags |= (CALLOUT_ACTIVE | CALLOUT_PENDING);
+	c->c_iflags |= CALLOUT_PENDING;
+	c->c_iflags &= ~CALLOUT_PROCESSED;
+	c->c_flags |= CALLOUT_ACTIVE;
+	if (flags & C_DIRECT_EXEC)
+		c->c_iflags |= CALLOUT_DIRECT;
 	c->c_func = func;
-	c->c_time = ticks + to_ticks;
-	TAILQ_INSERT_TAIL(&cc->cc_callwheel[c->c_time & callwheelmask], 
-	    c, c_links.tqe);
-	if ((c->c_time - cc->cc_firsttick) < 0 &&
-	    callout_new_inserted != NULL) {
-		cc->cc_firsttick = c->c_time;
-		(*callout_new_inserted)(cpu,
-		    to_ticks + (ticks - cc->cc_ticks));
+	c->c_time = sbt;
+	c->c_precision = precision;
+	bucket = callout_get_bucket(c->c_time);
+	CTR3(KTR_CALLOUT, "precision set for %p: %d.%08x",
+	    c, (int)(c->c_precision >> 32),
+	    (u_int)(c->c_precision & 0xffffffff));
+	LIST_INSERT_HEAD(&cc->cc_callwheel[bucket], c, c_links.le);
+	if (cc->cc_bucket == bucket)
+		cc_exec_next(cc) = c;
+#ifndef NO_EVENTTIMERS
+	/*
+	 * Inform the eventtimers(4) subsystem there's a new callout
+	 * that has been inserted, but only if really required.
+	 */
+	if (SBT_MAX - c->c_time < c->c_precision)
+		c->c_precision = SBT_MAX - c->c_time;
+	sbt = c->c_time + c->c_precision;
+	if (sbt < cc->cc_firstevent) {
+		cc->cc_firstevent = sbt;
+		cpu_new_callout(cpu, sbt, c->c_time);
 	}
+#endif
 }
 
 static void
 callout_cc_del(struct callout *c, struct callout_cpu *cc)
 {
 
-	if ((c->c_flags & CALLOUT_LOCAL_ALLOC) == 0)
+	if ((c->c_iflags & CALLOUT_LOCAL_ALLOC) == 0)
 		return;
 	c->c_func = NULL;
 	SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle);
 }
 
 static void
-softclock_call_cc(struct callout *c, struct callout_cpu *cc, int *mpcalls,
-    int *lockcalls, int *gcalls)
+softclock_call_cc(struct callout *c, struct callout_cpu *cc,
+#ifdef CALLOUT_PROFILING
+    int *mpcalls, int *lockcalls, int *gcalls,
+#endif
+    int direct)
 {
+#ifndef __rtems__
+	struct rm_priotracker tracker;
+#endif /* __rtems__ */
 	void (*c_func)(void *);
 	void *c_arg;
 	struct lock_class *class;
 	struct lock_object *c_lock;
-	int c_flags, sharedlock;
+	uintptr_t lock_status;
+	int c_iflags;
 #ifdef SMP
 	struct callout_cpu *new_cc;
 	void (*new_func)(void *);
 	void *new_arg;
-	int new_cpu, new_ticks;
+	int flags, new_cpu;
+	sbintime_t new_prec, new_time;
 #endif
-#ifdef DIAGNOSTIC
-	struct bintime bt1, bt2;
+#if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING) 
+	sbintime_t sbt1, sbt2;
 	struct timespec ts2;
-	static uint64_t maxdt = 36893488147419102LL;	/* 2 msec */
+	static sbintime_t maxdt = 2 * SBT_1MS;	/* 2 msec */
 	static timeout_t *lastfunc;
 #endif
 
-	KASSERT((c->c_flags & (CALLOUT_PENDING | CALLOUT_ACTIVE)) ==
-	    (CALLOUT_PENDING | CALLOUT_ACTIVE),
-	    ("softclock_call_cc: pend|act %p %x", c, c->c_flags));
+	KASSERT((c->c_iflags & CALLOUT_PENDING) == CALLOUT_PENDING,
+	    ("softclock_call_cc: pend %p %x", c, c->c_iflags));
+	KASSERT((c->c_flags & CALLOUT_ACTIVE) == CALLOUT_ACTIVE,
+	    ("softclock_call_cc: act %p %x", c, c->c_flags));
 	class = (c->c_lock != NULL) ? LOCK_CLASS(c->c_lock) : NULL;
-	sharedlock = (c->c_flags & CALLOUT_SHAREDLOCK) ? 0 : 1;
+	lock_status = 0;
+	if (c->c_flags & CALLOUT_SHAREDLOCK) {
+#ifndef __rtems__
+		if (class == &lock_class_rm)
+			lock_status = (uintptr_t)&tracker;
+		else
+#endif /* __rtems__ */
+			lock_status = 1;
+	}
 	c_lock = c->c_lock;
 	c_func = c->c_func;
 	c_arg = c->c_arg;
-	c_flags = c->c_flags;
-	if (c->c_flags & CALLOUT_LOCAL_ALLOC)
-		c->c_flags = CALLOUT_LOCAL_ALLOC;
+	c_iflags = c->c_iflags;
+	if (c->c_iflags & CALLOUT_LOCAL_ALLOC)
+		c->c_iflags = CALLOUT_LOCAL_ALLOC;
 	else
-		c->c_flags &= ~CALLOUT_PENDING;
-	cc->cc_curr = c;
-	cc->cc_cancel = 0;
+		c->c_iflags &= ~CALLOUT_PENDING;
+	
+	cc_exec_curr(cc, direct) = c;
+	cc_exec_cancel(cc, direct) = false;
+	cc_exec_drain(cc, direct) = NULL;
 	CC_UNLOCK(cc);
 	if (c_lock != NULL) {
-		class->lc_lock(c_lock, sharedlock);
+		class->lc_lock(c_lock, lock_status);
 		/*
 		 * The callout may have been cancelled
 		 * while we switched locks.
 		 */
-		if (cc->cc_cancel) {
+		if (cc_exec_cancel(cc, direct)) {
 			class->lc_unlock(c_lock);
 			goto skip;
 		}
 		/* The callout cannot be stopped now. */
-		cc->cc_cancel = 1;
-
+		cc_exec_cancel(cc, direct) = true;
 		if (c_lock == &Giant.lock_object) {
+#ifdef CALLOUT_PROFILING
 			(*gcalls)++;
-			CTR3(KTR_CALLOUT, "callout %p func %p arg %p",
+#endif
+			CTR3(KTR_CALLOUT, "callout giant %p func %p arg %p",
 			    c, c_func, c_arg);
 		} else {
+#ifdef CALLOUT_PROFILING
 			(*lockcalls)++;
+#endif
 			CTR3(KTR_CALLOUT, "callout lock %p func %p arg %p",
 			    c, c_func, c_arg);
 		}
 	} else {
+#ifdef CALLOUT_PROFILING
 		(*mpcalls)++;
-		CTR3(KTR_CALLOUT, "callout mpsafe %p func %p arg %p",
+#endif
+		CTR3(KTR_CALLOUT, "callout %p func %p arg %p",
 		    c, c_func, c_arg);
 	}
-#ifdef DIAGNOSTIC
-	binuptime(&bt1);
+	KTR_STATE3(KTR_SCHED, "callout", cc->cc_ktr_event_name, "running",
+	    "func:%p", c_func, "arg:%p", c_arg, "direct:%d", direct);
+#if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING)
+	sbt1 = sbinuptime();
 #endif
 #ifndef __rtems__
 	THREAD_NO_SLEEPING();
-	SDT_PROBE(callout_execute, kernel, , callout__start, c, 0, 0, 0, 0);
+	SDT_PROBE1(callout_execute, , , callout__start, c);
 #endif /* __rtems__ */
 	c_func(c_arg);
 #ifndef __rtems__
-	SDT_PROBE(callout_execute, kernel, , callout__end, c, 0, 0, 0, 0);
+	SDT_PROBE1(callout_execute, , , callout__end, c);
 	THREAD_SLEEPING_OK();
 #endif /* __rtems__ */
-#ifdef DIAGNOSTIC
-	binuptime(&bt2);
-	bintime_sub(&bt2, &bt1);
-	if (bt2.frac > maxdt) {
-		if (lastfunc != c_func || bt2.frac > maxdt * 2) {
-			bintime2timespec(&bt2, &ts2);
+#if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING)
+	sbt2 = sbinuptime();
+	sbt2 -= sbt1;
+	if (sbt2 > maxdt) {
+		if (lastfunc != c_func || sbt2 > maxdt * 2) {
+			ts2 = sbttots(sbt2);
 			printf(
 		"Expensive timeout(9) function: %p(%p) %jd.%09ld s\n",
 			    c_func, c_arg, (intmax_t)ts2.tv_sec, ts2.tv_nsec);
 		}
-		maxdt = bt2.frac;
+		maxdt = sbt2;
 		lastfunc = c_func;
 	}
 #endif
+	KTR_STATE0(KTR_SCHED, "callout", cc->cc_ktr_event_name, "idle");
 	CTR1(KTR_CALLOUT, "callout %p finished", c);
-	if ((c_flags & CALLOUT_RETURNUNLOCKED) == 0)
+	if ((c_iflags & CALLOUT_RETURNUNLOCKED) == 0)
 		class->lc_unlock(c_lock);
 skip:
 	CC_LOCK(cc);
-	KASSERT(cc->cc_curr == c, ("mishandled cc_curr"));
-	cc->cc_curr = NULL;
-	if (cc->cc_waiting) {
+	KASSERT(cc_exec_curr(cc, direct) == c, ("mishandled cc_curr"));
+	cc_exec_curr(cc, direct) = NULL;
+	if (cc_exec_drain(cc, direct)) {
+		void (*drain)(void *);
+		
+		drain = cc_exec_drain(cc, direct);
+		cc_exec_drain(cc, direct) = NULL;
+		CC_UNLOCK(cc);
+		drain(c_arg);
+		CC_LOCK(cc);
+	}
+	if (cc_exec_waiting(cc, direct)) {
 		/*
 		 * There is someone waiting for the
 		 * callout to complete.
 		 * If the callout was scheduled for
 		 * migration just cancel it.
 		 */
-		if (cc_cme_migrating(cc)) {
-			cc_cme_cleanup(cc);
+		if (cc_cce_migrating(cc, direct)) {
+			cc_cce_cleanup(cc, direct);
 
 			/*
 			 * It should be assert here that the callout is not
 			 * destroyed but that is not easy.
 			 */
-			c->c_flags &= ~CALLOUT_DFRMIGRATION;
+			c->c_iflags &= ~CALLOUT_DFRMIGRATION;
 		}
-		cc->cc_waiting = 0;
+		cc_exec_waiting(cc, direct) = false;
 		CC_UNLOCK(cc);
-		wakeup(&cc->cc_waiting);
+		wakeup(&cc_exec_waiting(cc, direct));
 		CC_LOCK(cc);
-	} else if (cc_cme_migrating(cc)) {
-		KASSERT((c_flags & CALLOUT_LOCAL_ALLOC) == 0,
+	} else if (cc_cce_migrating(cc, direct)) {
+		KASSERT((c_iflags & CALLOUT_LOCAL_ALLOC) == 0,
 		    ("Migrating legacy callout %p", c));
 #ifdef SMP
 		/*
 		 * If the callout was scheduled for
 		 * migration just perform it now.
 		 */
-		new_cpu = cc->cc_migration_cpu;
-		new_ticks = cc->cc_migration_ticks;
-		new_func = cc->cc_migration_func;
-		new_arg = cc->cc_migration_arg;
-		cc_cme_cleanup(cc);
+		new_cpu = cc_migration_cpu(cc, direct);
+		new_time = cc_migration_time(cc, direct);
+		new_prec = cc_migration_prec(cc, direct);
+		new_func = cc_migration_func(cc, direct);
+		new_arg = cc_migration_arg(cc, direct);
+		cc_cce_cleanup(cc, direct);
 
 		/*
 		 * It should be assert here that the callout is not destroyed
@@ -646,18 +877,19 @@ skip:
 		 *
 		 * As first thing, handle deferred callout stops.
 		 */
-		if ((c->c_flags & CALLOUT_DFRMIGRATION) == 0) {
+		if (!callout_migrating(c)) {
 			CTR3(KTR_CALLOUT,
 			     "deferred cancelled %p func %p arg %p",
 			     c, new_func, new_arg);
 			callout_cc_del(c, cc);
 			return;
 		}
-		c->c_flags &= ~CALLOUT_DFRMIGRATION;
+		c->c_iflags &= ~CALLOUT_DFRMIGRATION;
 
 		new_cc = callout_cpu_switch(c, cc, new_cpu);
-		callout_cc_add(c, new_cc, new_ticks, new_func, new_arg,
-		    new_cpu);
+		flags = (direct) ? C_DIRECT_EXEC : 0;
+		callout_cc_add(c, new_cc, new_time, new_prec, new_func,
+		    new_arg, new_cpu, flags);
 		CC_UNLOCK(new_cc);
 		CC_LOCK(cc);
 #else
@@ -668,19 +900,19 @@ skip:
 	 * If the current callout is locally allocated (from
 	 * timeout(9)) then put it on the freelist.
 	 *
-	 * Note: we need to check the cached copy of c_flags because
+	 * Note: we need to check the cached copy of c_iflags because
 	 * if it was not local, then it's not safe to deref the
 	 * callout pointer.
 	 */
-	KASSERT((c_flags & CALLOUT_LOCAL_ALLOC) == 0 ||
-	    c->c_flags == CALLOUT_LOCAL_ALLOC,
+	KASSERT((c_iflags & CALLOUT_LOCAL_ALLOC) == 0 ||
+	    c->c_iflags == CALLOUT_LOCAL_ALLOC,
 	    ("corrupted callout"));
-	if (c_flags & CALLOUT_LOCAL_ALLOC)
+	if (c_iflags & CALLOUT_LOCAL_ALLOC)
 		callout_cc_del(c, cc);
 }
 
 /*
- * The callout mechanism is based on the work of Adam M. Costello and 
+ * The callout mechanism is based on the work of Adam M. Costello and
  * George Varghese, published in a technical report entitled "Redesigning
  * the BSD Callout and Timer Facilities" and modified slightly for inclusion
  * in FreeBSD by Justin T. Gibbs.  The original work on the data structures
@@ -691,6 +923,7 @@ skip:
  * Austin, Texas Nov 1987.
  */
 
+#ifndef __rtems__
 /*
  * Software (low priority) clock interrupt.
  * Run periodic events from timeout queue.
@@ -700,65 +933,32 @@ softclock(void *arg)
 {
 	struct callout_cpu *cc;
 	struct callout *c;
-	struct callout_tailq *bucket;
-	int curticks;
-	int steps;	/* #steps since we last allowed interrupts */
-	int depth;
-	int mpcalls;
-	int lockcalls;
-	int gcalls;
-
-#ifndef MAX_SOFTCLOCK_STEPS
-#define MAX_SOFTCLOCK_STEPS 100 /* Maximum allowed value of steps. */
-#endif /* MAX_SOFTCLOCK_STEPS */
-
-	mpcalls = 0;
-	lockcalls = 0;
-	gcalls = 0;
-	depth = 0;
-	steps = 0;
+#ifdef CALLOUT_PROFILING
+	int depth = 0, gcalls = 0, lockcalls = 0, mpcalls = 0;
+#endif
+
 	cc = (struct callout_cpu *)arg;
 	CC_LOCK(cc);
-	while (cc->cc_softticks - 1 != cc->cc_ticks) {
-		/*
-		 * cc_softticks may be modified by hard clock, so cache
-		 * it while we work on a given bucket.
-		 */
-		curticks = cc->cc_softticks;
-		cc->cc_softticks++;
-		bucket = &cc->cc_callwheel[curticks & callwheelmask];
-		c = TAILQ_FIRST(bucket);
-		while (c != NULL) {
-			depth++;
-			if (c->c_time != curticks) {
-				c = TAILQ_NEXT(c, c_links.tqe);
-				++steps;
-				if (steps >= MAX_SOFTCLOCK_STEPS) {
-					cc->cc_next = c;
-					/* Give interrupts a chance. */
-					CC_UNLOCK(cc);
-					;	/* nothing */
-					CC_LOCK(cc);
-					c = cc->cc_next;
-					steps = 0;
-				}
-			} else {
-				cc->cc_next = TAILQ_NEXT(c, c_links.tqe);
-				TAILQ_REMOVE(bucket, c, c_links.tqe);
-				softclock_call_cc(c, cc, &mpcalls,
-				    &lockcalls, &gcalls);
-				steps = 0;
-				c = cc->cc_next;
-			}
-		}
+	while ((c = TAILQ_FIRST(&cc->cc_expireq)) != NULL) {
+		TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe);
+		softclock_call_cc(c, cc,
+#ifdef CALLOUT_PROFILING
+		    &mpcalls, &lockcalls, &gcalls,
+#endif
+		    0);
+#ifdef CALLOUT_PROFILING
+		++depth;
+#endif
 	}
+#ifdef CALLOUT_PROFILING
 	avg_depth += (depth * 1000 - avg_depth) >> 8;
 	avg_mpcalls += (mpcalls * 1000 - avg_mpcalls) >> 8;
 	avg_lockcalls += (lockcalls * 1000 - avg_lockcalls) >> 8;
 	avg_gcalls += (gcalls * 1000 - avg_gcalls) >> 8;
-	cc->cc_next = NULL;
+#endif
 	CC_UNLOCK(cc);
 }
+#endif /* __rtems__ */
 
 /*
  * timeout --
@@ -771,16 +971,13 @@ softclock(void *arg)
  *	Initialize a handle so that using it with untimeout is benign.
  *
  *	See AT&T BCI Driver Reference Manual for specification.  This
- *	implementation differs from that one in that although an 
+ *	implementation differs from that one in that although an
  *	identification value is returned from timeout, the original
  *	arguments to timeout as well as the identifier are used to
  *	identify entries for untimeout.
  */
 struct callout_handle
-timeout(ftn, arg, to_ticks)
-	timeout_t *ftn;
-	void *arg;
-	int to_ticks;
+timeout(timeout_t *ftn, void *arg, int to_ticks)
 {
 	struct callout_cpu *cc;
 	struct callout *new;
@@ -802,10 +999,7 @@ timeout(ftn, arg, to_ticks)
 }
 
 void
-untimeout(ftn, arg, handle)
-	timeout_t *ftn;
-	void *arg;
-	struct callout_handle handle;
+untimeout(timeout_t *ftn, void *arg, struct callout_handle handle)
 {
 	struct callout_cpu *cc;
 
@@ -829,6 +1023,56 @@ callout_handle_init(struct callout_handle *handle)
 	handle->callout = NULL;
 }
 
+void
+callout_when(sbintime_t sbt, sbintime_t precision, int flags,
+    sbintime_t *res, sbintime_t *prec_res)
+{
+	sbintime_t to_sbt, to_pr;
+
+	if ((flags & (C_ABSOLUTE | C_PRECALC)) != 0) {
+		*res = sbt;
+		*prec_res = precision;
+		return;
+	}
+	if ((flags & C_HARDCLOCK) != 0 && sbt < tick_sbt)
+		sbt = tick_sbt;
+	if ((flags & C_HARDCLOCK) != 0 ||
+#ifdef NO_EVENTTIMERS
+	    sbt >= sbt_timethreshold) {
+		to_sbt = getsbinuptime();
+
+		/* Add safety belt for the case of hz > 1000. */
+		to_sbt += tc_tick_sbt - tick_sbt;
+#else
+	    sbt >= sbt_tickthreshold) {
+		/*
+		 * Obtain the time of the last hardclock() call on
+		 * this CPU directly from the kern_clocksource.c.
+		 * This value is per-CPU, but it is equal for all
+		 * active ones.
+		 */
+#ifdef __LP64__
+		to_sbt = DPCPU_GET(hardclocktime);
+#else
+		spinlock_enter();
+		to_sbt = DPCPU_GET(hardclocktime);
+		spinlock_exit();
+#endif
+#endif
+		if ((flags & C_HARDCLOCK) == 0)
+			to_sbt += tick_sbt;
+	} else
+		to_sbt = sbinuptime();
+	if (SBT_MAX - to_sbt < sbt)
+		to_sbt = SBT_MAX;
+	else
+		to_sbt += sbt;
+	*res = to_sbt;
+	to_pr = ((C_PRELGET(flags) < 0) ? sbt >> tc_precexp :
+	    sbt >> C_PRELGET(flags));
+	*prec_res = to_pr > precision ? to_pr : precision;
+}
+
 /*
  * New interface; clients allocate their own callout structures.
  *
@@ -846,28 +1090,56 @@ callout_handle_init(struct callout_handle *handle)
  * callout_deactivate() - marks the callout as having been serviced
  */
 int
-callout_reset_on(struct callout *c, int to_ticks, void (*ftn)(void *),
-    void *arg, int cpu)
+callout_reset_sbt_on(struct callout *c, sbintime_t sbt, sbintime_t prec,
+    void (*ftn)(void *), void *arg, int cpu, int flags)
 {
+	sbintime_t to_sbt, precision;
 	struct callout_cpu *cc;
-	int cancelled = 0;
+	int cancelled, direct;
+	int ignore_cpu=0;
+
+	cancelled = 0;
+	if (cpu == -1) {
+		ignore_cpu = 1;
+	} else if ((cpu >= MAXCPU) ||
+		   ((CC_CPU(cpu))->cc_inited == 0)) {
+		/* Invalid CPU spec */
+		panic("Invalid CPU in callout %d", cpu);
+	}
+	callout_when(sbt, prec, flags, &to_sbt, &precision);
 
+	/* 
+	 * This flag used to be added by callout_cc_add, but the
+	 * first time you call this we could end up with the
+	 * wrong direct flag if we don't do it before we add.
+	 */
+	if (flags & C_DIRECT_EXEC) {
+		direct = 1;
+	} else {
+		direct = 0;
+	}
+	KASSERT(!direct || c->c_lock == NULL,
+	    ("%s: direct callout %p has lock", __func__, c));
+	cc = callout_lock(c);
 	/*
 	 * Don't allow migration of pre-allocated callouts lest they
-	 * become unbalanced.
+	 * become unbalanced or handle the case where the user does
+	 * not care. 
 	 */
-	if (c->c_flags & CALLOUT_LOCAL_ALLOC)
+	if ((c->c_iflags & CALLOUT_LOCAL_ALLOC) ||
+	    ignore_cpu) {
 		cpu = c->c_cpu;
-	cc = callout_lock(c);
-	if (cc->cc_curr == c) {
+	}
+
+	if (cc_exec_curr(cc, direct) == c) {
 		/*
 		 * We're being asked to reschedule a callout which is
 		 * currently in progress.  If there is a lock then we
 		 * can cancel the callout if it has not really started.
 		 */
-		if (c->c_lock != NULL && !cc->cc_cancel)
-			cancelled = cc->cc_cancel = 1;
-		if (cc->cc_waiting) {
+		if (c->c_lock != NULL && !cc_exec_cancel(cc, direct))
+			cancelled = cc_exec_cancel(cc, direct) = true;
+		if (cc_exec_waiting(cc, direct) || cc_exec_drain(cc, direct)) {
 			/*
 			 * Someone has called callout_drain to kill this
 			 * callout.  Don't reschedule.
@@ -878,16 +1150,41 @@ callout_reset_on(struct callout *c, int to_ticks, void (*ftn)(void *),
 			CC_UNLOCK(cc);
 			return (cancelled);
 		}
+#ifdef SMP
+		if (callout_migrating(c)) {
+			/* 
+			 * This only occurs when a second callout_reset_sbt_on
+			 * is made after a previous one moved it into
+			 * deferred migration (below). Note we do *not* change
+			 * the prev_cpu even though the previous target may
+			 * be different.
+			 */
+			cc_migration_cpu(cc, direct) = cpu;
+			cc_migration_time(cc, direct) = to_sbt;
+			cc_migration_prec(cc, direct) = precision;
+			cc_migration_func(cc, direct) = ftn;
+			cc_migration_arg(cc, direct) = arg;
+			cancelled = 1;
+			CC_UNLOCK(cc);
+			return (cancelled);
+		}
+#endif
 	}
-	if (c->c_flags & CALLOUT_PENDING) {
-		if (cc->cc_next == c) {
-			cc->cc_next = TAILQ_NEXT(c, c_links.tqe);
+	if (c->c_iflags & CALLOUT_PENDING) {
+#ifndef __rtems__
+		if ((c->c_iflags & CALLOUT_PROCESSED) == 0) {
+#endif /* __rtems__ */
+			if (cc_exec_next(cc) == c)
+				cc_exec_next(cc) = LIST_NEXT(c, c_links.le);
+			LIST_REMOVE(c, c_links.le);
+#ifndef __rtems__
+		} else {
+			TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe);
 		}
-		TAILQ_REMOVE(&cc->cc_callwheel[c->c_time & callwheelmask], c,
-		    c_links.tqe);
-
+#endif /* __rtems__ */
 		cancelled = 1;
-		c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING);
+		c->c_iflags &= ~ CALLOUT_PENDING;
+		c->c_flags &= ~ CALLOUT_ACTIVE;
 	}
 
 #ifdef SMP
@@ -897,15 +1194,34 @@ callout_reset_on(struct callout *c, int to_ticks, void (*ftn)(void *),
 	 * to a more appropriate moment.
 	 */
 	if (c->c_cpu != cpu) {
-		if (cc->cc_curr == c) {
-			cc->cc_migration_cpu = cpu;
-			cc->cc_migration_ticks = to_ticks;
-			cc->cc_migration_func = ftn;
-			cc->cc_migration_arg = arg;
-			c->c_flags |= CALLOUT_DFRMIGRATION;
-			CTR5(KTR_CALLOUT,
-		    "migration of %p func %p arg %p in %d to %u deferred",
-			    c, c->c_func, c->c_arg, to_ticks, cpu);
+		if (cc_exec_curr(cc, direct) == c) {
+			/* 
+			 * Pending will have been removed since we are
+			 * actually executing the callout on another
+			 * CPU. That callout should be waiting on the
+			 * lock the caller holds. If we set both
+			 * active/and/pending after we return and the
+			 * lock on the executing callout proceeds, it
+			 * will then see pending is true and return.
+			 * At the return from the actual callout execution
+			 * the migration will occur in softclock_call_cc
+			 * and this new callout will be placed on the 
+			 * new CPU via a call to callout_cpu_switch() which
+			 * will get the lock on the right CPU followed
+			 * by a call callout_cc_add() which will add it there.
+			 * (see above in softclock_call_cc()).
+			 */
+			cc_migration_cpu(cc, direct) = cpu;
+			cc_migration_time(cc, direct) = to_sbt;
+			cc_migration_prec(cc, direct) = precision;
+			cc_migration_func(cc, direct) = ftn;
+			cc_migration_arg(cc, direct) = arg;
+			c->c_iflags |= (CALLOUT_DFRMIGRATION | CALLOUT_PENDING);
+			c->c_flags |= CALLOUT_ACTIVE;
+			CTR6(KTR_CALLOUT,
+		    "migration of %p func %p arg %p in %d.%08x to %u deferred",
+			    c, c->c_func, c->c_arg, (int)(to_sbt >> 32),
+			    (u_int)(to_sbt & 0xffffffff), cpu);
 			CC_UNLOCK(cc);
 			return (cancelled);
 		}
@@ -913,9 +1229,10 @@ callout_reset_on(struct callout *c, int to_ticks, void (*ftn)(void *),
 	}
 #endif
 
-	callout_cc_add(c, cc, to_ticks, ftn, arg, cpu);
-	CTR5(KTR_CALLOUT, "%sscheduled %p func %p arg %p in %d",
-	    cancelled ? "re" : "", c, c->c_func, c->c_arg, to_ticks);
+	callout_cc_add(c, cc, to_sbt, precision, ftn, arg, cpu, flags);
+	CTR6(KTR_CALLOUT, "%sscheduled %p func %p arg %p in %d.%08x",
+	    cancelled ? "re" : "", c, c->c_func, c->c_arg, (int)(to_sbt >> 32),
+	    (u_int)(to_sbt & 0xffffffff));
 	CC_UNLOCK(cc);
 
 	return (cancelled);
@@ -937,25 +1254,26 @@ callout_schedule(struct callout *c, int to_ticks)
 }
 
 int
-_callout_stop_safe(c, safe)
-	struct	callout *c;
-	int	safe;
+_callout_stop_safe(struct callout *c, int flags, void (*drain)(void *))
 {
-#ifndef __rtems__
 	struct callout_cpu *cc, *old_cc;
 	struct lock_class *class;
-	int use_lock, sq_locked;
-#else /* __rtems__ */
-	struct callout_cpu *cc;
-	struct lock_class *class;
-	int use_lock;
+	int direct, sq_locked, use_lock;
+	int cancelled, not_on_a_list;
+#ifdef __rtems__
+	(void)old_cc;
+	(void)sq_locked;
 #endif /* __rtems__ */
 
+	if ((flags & CS_DRAIN) != 0)
+		WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, c->c_lock,
+		    "calling %s", __func__);
+
 	/*
 	 * Some old subsystems don't hold Giant while running a callout_stop(),
 	 * so just discard this check for the moment.
 	 */
-	if (!safe && c->c_lock != NULL) {
+	if ((flags & CS_DRAIN) == 0 && c->c_lock != NULL) {
 		if (c->c_lock == &Giant.lock_object)
 			use_lock = mtx_owned(&Giant);
 		else {
@@ -965,6 +1283,11 @@ _callout_stop_safe(c, safe)
 		}
 	} else
 		use_lock = 0;
+	if (c->c_iflags & CALLOUT_DIRECT) {
+		direct = 1;
+	} else {
+		direct = 0;
+	}
 
 #ifndef __rtems__
 	sq_locked = 0;
@@ -973,6 +1296,28 @@ again:
 #endif /* __rtems__ */
 	cc = callout_lock(c);
 
+	if ((c->c_iflags & (CALLOUT_DFRMIGRATION | CALLOUT_PENDING)) ==
+	    (CALLOUT_DFRMIGRATION | CALLOUT_PENDING) &&
+	    ((c->c_flags & CALLOUT_ACTIVE) == CALLOUT_ACTIVE)) {
+		/*
+		 * Special case where this slipped in while we
+		 * were migrating *as* the callout is about to
+		 * execute. The caller probably holds the lock
+		 * the callout wants.
+		 *
+		 * Get rid of the migration first. Then set
+		 * the flag that tells this code *not* to
+		 * try to remove it from any lists (its not
+		 * on one yet). When the callout wheel runs,
+		 * it will ignore this callout.
+		 */
+		c->c_iflags &= ~CALLOUT_PENDING;
+		c->c_flags &= ~CALLOUT_ACTIVE;
+		not_on_a_list = 1;
+	} else {
+		not_on_a_list = 0;
+	}
+
 #ifndef __rtems__
 	/*
 	 * If the callout was migrating while the callout cpu lock was
@@ -982,7 +1327,7 @@ again:
 	if (sq_locked != 0 && cc != old_cc) {
 #ifdef SMP
 		CC_UNLOCK(cc);
-		sleepq_release(&old_cc->cc_waiting);
+		sleepq_release(&cc_exec_waiting(old_cc, direct));
 		sq_locked = 0;
 		old_cc = NULL;
 		goto again;
@@ -993,36 +1338,23 @@ again:
 #endif /* __rtems__ */
 
 	/*
-	 * If the callout isn't pending, it's not on the queue, so
-	 * don't attempt to remove it from the queue.  We can try to
-	 * stop it by other means however.
+	 * If the callout is running, try to stop it or drain it.
 	 */
-	if (!(c->c_flags & CALLOUT_PENDING)) {
-		c->c_flags &= ~CALLOUT_ACTIVE;
-
+	if (cc_exec_curr(cc, direct) == c) {
 		/*
-		 * If it wasn't on the queue and it isn't the current
-		 * callout, then we can't stop it, so just bail.
+		 * Succeed we to stop it or not, we must clear the
+		 * active flag - this is what API users expect.
 		 */
-		if (cc->cc_curr != c) {
-			CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p",
-			    c, c->c_func, c->c_arg);
-			CC_UNLOCK(cc);
-#ifndef __rtems__
-			if (sq_locked)
-				sleepq_release(&cc->cc_waiting);
-#endif /* __rtems__ */
-			return (0);
-		}
+		c->c_flags &= ~CALLOUT_ACTIVE;
 
-		if (safe) {
+		if ((flags & CS_DRAIN) != 0) {
 			/*
 			 * The current callout is running (or just
 			 * about to run) and blocking is allowed, so
 			 * just wait for the current invocation to
 			 * finish.
 			 */
-			while (cc->cc_curr == c) {
+			while (cc_exec_curr(cc, direct) == c) {
 #ifndef __rtems__
 
 				/*
@@ -1044,7 +1376,8 @@ again:
 				 */
 				if (!sq_locked) {
 					CC_UNLOCK(cc);
-					sleepq_lock(&cc->cc_waiting);
+					sleepq_lock(
+					    &cc_exec_waiting(cc, direct));
 					sq_locked = 1;
 					old_cc = cc;
 					goto again;
@@ -1056,13 +1389,16 @@ again:
 				 * will be packed up, just let softclock()
 				 * take care of it.
 				 */
-				cc->cc_waiting = 1;
+				cc_exec_waiting(cc, direct) = true;
 				DROP_GIANT();
 				CC_UNLOCK(cc);
-				sleepq_add(&cc->cc_waiting,
+				sleepq_add(
+				    &cc_exec_waiting(cc, direct),
 				    &cc->cc_lock.lock_object, "codrain",
 				    SLEEPQ_SLEEP, 0);
-				sleepq_wait(&cc->cc_waiting, 0);
+				sleepq_wait(
+				    &cc_exec_waiting(cc, direct),
+					     0);
 				sq_locked = 0;
 				old_cc = NULL;
 
@@ -1076,84 +1412,144 @@ again:
 				 * sleepq_set_timeout() and instead use the
 				 * RTEMS watchdog.
 				 */
-				cc->cc_waiting = 1;
-				msleep_spin(&cc->cc_waiting, &cc->cc_lock,
-				    "codrain", 0);
+				cc_exec_waiting(cc, direct) = true;
+				msleep_spin(&cc_exec_waiting(cc, direct),
+				    &cc->cc_lock, "codrain", 0);
 #endif /* __rtems__ */
 			}
-		} else if (use_lock && !cc->cc_cancel) {
+		} else if (use_lock &&
+			   !cc_exec_cancel(cc, direct) && (drain == NULL)) {
+			
 			/*
 			 * The current callout is waiting for its
 			 * lock which we hold.  Cancel the callout
 			 * and return.  After our caller drops the
 			 * lock, the callout will be skipped in
-			 * softclock().
+			 * softclock(). This *only* works with a
+			 * callout_stop() *not* callout_drain() or
+			 * callout_async_drain().
 			 */
-			cc->cc_cancel = 1;
+			cc_exec_cancel(cc, direct) = true;
 			CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p",
 			    c, c->c_func, c->c_arg);
-			KASSERT(!cc_cme_migrating(cc),
+			KASSERT(!cc_cce_migrating(cc, direct),
 			    ("callout wrongly scheduled for migration"));
+			if (callout_migrating(c)) {
+				c->c_iflags &= ~CALLOUT_DFRMIGRATION;
+#ifdef SMP
+				cc_migration_cpu(cc, direct) = CPUBLOCK;
+				cc_migration_time(cc, direct) = 0;
+				cc_migration_prec(cc, direct) = 0;
+				cc_migration_func(cc, direct) = NULL;
+				cc_migration_arg(cc, direct) = NULL;
+#endif
+			}
 			CC_UNLOCK(cc);
 #ifndef __rtems__
 			KASSERT(!sq_locked, ("sleepqueue chain locked"));
 #endif /* __rtems__ */
 			return (1);
-		} else if ((c->c_flags & CALLOUT_DFRMIGRATION) != 0) {
-			c->c_flags &= ~CALLOUT_DFRMIGRATION;
+		} else if (callout_migrating(c)) {
+			/*
+			 * The callout is currently being serviced
+			 * and the "next" callout is scheduled at
+			 * its completion with a migration. We remove
+			 * the migration flag so it *won't* get rescheduled,
+			 * but we can't stop the one thats running so
+			 * we return 0.
+			 */
+			c->c_iflags &= ~CALLOUT_DFRMIGRATION;
+#ifdef SMP
+			/* 
+			 * We can't call cc_cce_cleanup here since
+			 * if we do it will remove .ce_curr and
+			 * its still running. This will prevent a
+			 * reschedule of the callout when the 
+			 * execution completes.
+			 */
+			cc_migration_cpu(cc, direct) = CPUBLOCK;
+			cc_migration_time(cc, direct) = 0;
+			cc_migration_prec(cc, direct) = 0;
+			cc_migration_func(cc, direct) = NULL;
+			cc_migration_arg(cc, direct) = NULL;
+#endif
 			CTR3(KTR_CALLOUT, "postponing stop %p func %p arg %p",
 			    c, c->c_func, c->c_arg);
+ 			if (drain) {
+				cc_exec_drain(cc, direct) = drain;
+			}
 			CC_UNLOCK(cc);
-			return (1);
+			return ((flags & CS_EXECUTING) != 0);
 		}
 		CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p",
 		    c, c->c_func, c->c_arg);
-		CC_UNLOCK(cc);
+		if (drain) {
+			cc_exec_drain(cc, direct) = drain;
+		}
 #ifndef __rtems__
 		KASSERT(!sq_locked, ("sleepqueue chain still locked"));
 #endif /* __rtems__ */
-		return (0);
-	}
+		cancelled = ((flags & CS_EXECUTING) != 0);
+	} else
+		cancelled = 1;
+
 #ifndef __rtems__
 	if (sq_locked)
-		sleepq_release(&cc->cc_waiting);
+		sleepq_release(&cc_exec_waiting(cc, direct));
 #endif /* __rtems__ */
 
-	c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING);
+	if ((c->c_iflags & CALLOUT_PENDING) == 0) {
+		CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p",
+		    c, c->c_func, c->c_arg);
+		/*
+		 * For not scheduled and not executing callout return
+		 * negative value.
+		 */
+		if (cc_exec_curr(cc, direct) != c)
+			cancelled = -1;
+		CC_UNLOCK(cc);
+		return (cancelled);
+	}
+
+	c->c_iflags &= ~CALLOUT_PENDING;
+	c->c_flags &= ~CALLOUT_ACTIVE;
 
 	CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p",
 	    c, c->c_func, c->c_arg);
-	if (cc->cc_next == c)
-		cc->cc_next = TAILQ_NEXT(c, c_links.tqe);
-	TAILQ_REMOVE(&cc->cc_callwheel[c->c_time & callwheelmask], c,
-	    c_links.tqe);
+	if (not_on_a_list == 0) {
+#ifndef __rtems__
+		if ((c->c_iflags & CALLOUT_PROCESSED) == 0) {
+#endif /* __rtems__ */
+			if (cc_exec_next(cc) == c)
+				cc_exec_next(cc) = LIST_NEXT(c, c_links.le);
+			LIST_REMOVE(c, c_links.le);
+#ifndef __rtems__
+		} else {
+			TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe);
+		}
+#endif /* __rtems__ */
+	}
 	callout_cc_del(c, cc);
-
 	CC_UNLOCK(cc);
-	return (1);
+	return (cancelled);
 }
 
 void
-callout_init(c, mpsafe)
-	struct	callout *c;
-	int mpsafe;
+callout_init(struct callout *c, int mpsafe)
 {
 	bzero(c, sizeof *c);
 	if (mpsafe) {
 		c->c_lock = NULL;
-		c->c_flags = CALLOUT_RETURNUNLOCKED;
+		c->c_iflags = CALLOUT_RETURNUNLOCKED;
 	} else {
 		c->c_lock = &Giant.lock_object;
-		c->c_flags = 0;
+		c->c_iflags = 0;
 	}
 	c->c_cpu = timeout_cpu;
 }
 
 void
-_callout_init_lock(c, lock, flags)
-	struct	callout *c;
-	struct	lock_object *lock;
-	int flags;
+_callout_init_lock(struct callout *c, struct lock_object *lock, int flags)
 {
 	bzero(c, sizeof *c);
 	c->c_lock = lock;
@@ -1164,7 +1560,7 @@ _callout_init_lock(c, lock, flags)
 	KASSERT(lock == NULL || !(LOCK_CLASS(lock)->lc_flags &
 	    (LC_SPINLOCK | LC_SLEEPABLE)), ("%s: invalid lock class",
 	    __func__));
-	c->c_flags = flags & (CALLOUT_RETURNUNLOCKED | CALLOUT_SHAREDLOCK);
+	c->c_iflags = flags & (CALLOUT_RETURNUNLOCKED | CALLOUT_SHAREDLOCK);
 	c->c_cpu = timeout_cpu;
 }
 
@@ -1181,12 +1577,11 @@ _callout_init_lock(c, lock, flags)
  * which set the timer can do the maintanence the timer was for as close
  * as possible to the originally intended time.  Testing this code for a 
  * week showed that resuming from a suspend resulted in 22 to 25 timers 
- * firing, which seemed independant on whether the suspend was 2 hours or
+ * firing, which seemed independent on whether the suspend was 2 hours or
  * 2 days.  Your milage may vary.   - Ken Key <key@cs.utk.edu>
  */
 void
-adjust_timeout_calltodo(time_change)
-    struct timeval *time_change;
+adjust_timeout_calltodo(struct timeval *time_change)
 {
 	register struct callout *p;
 	unsigned long delta_ticks;
@@ -1200,11 +1595,11 @@ adjust_timeout_calltodo(time_change)
 	if (time_change->tv_sec < 0)
 		return;
 	else if (time_change->tv_sec <= LONG_MAX / 1000000)
-		delta_ticks = (time_change->tv_sec * 1000000 +
-			       time_change->tv_usec + (tick - 1)) / tick + 1;
+		delta_ticks = howmany(time_change->tv_sec * 1000000 +
+		    time_change->tv_usec, tick) + 1;
 	else if (time_change->tv_sec <= LONG_MAX / hz)
 		delta_ticks = time_change->tv_sec * hz +
-			      (time_change->tv_usec + (tick - 1)) / tick + 1;
+		    howmany(time_change->tv_usec, tick) + 1;
 	else
 		delta_ticks = LONG_MAX;
 
@@ -1233,3 +1628,152 @@ adjust_timeout_calltodo(time_change)
 	return;
 }
 #endif /* APM_FIXUP_CALLTODO */
+
+static int
+flssbt(sbintime_t sbt)
+{
+
+	sbt += (uint64_t)sbt >> 1;
+	if (sizeof(long) >= sizeof(sbintime_t))
+		return (flsl(sbt));
+	if (sbt >= SBT_1S)
+		return (flsl(((uint64_t)sbt) >> 32) + 32);
+	return (flsl(sbt));
+}
+
+/*
+ * Dump immediate statistic snapshot of the scheduled callouts.
+ */
+static int
+sysctl_kern_callout_stat(SYSCTL_HANDLER_ARGS)
+{
+	struct callout *tmp;
+	struct callout_cpu *cc;
+	struct callout_list *sc;
+	sbintime_t maxpr, maxt, medpr, medt, now, spr, st, t;
+	int ct[64], cpr[64], ccpbk[32];
+	int error, val, i, count, tcum, pcum, maxc, c, medc;
+#ifdef SMP
+	int cpu;
+#endif
+
+	val = 0;
+	error = sysctl_handle_int(oidp, &val, 0, req);
+	if (error != 0 || req->newptr == NULL)
+		return (error);
+	count = maxc = 0;
+	st = spr = maxt = maxpr = 0;
+	bzero(ccpbk, sizeof(ccpbk));
+	bzero(ct, sizeof(ct));
+	bzero(cpr, sizeof(cpr));
+	now = sbinuptime();
+#ifdef SMP
+	CPU_FOREACH(cpu) {
+		cc = CC_CPU(cpu);
+#else
+		cc = CC_CPU(timeout_cpu);
+#endif
+		CC_LOCK(cc);
+		for (i = 0; i < callwheelsize; i++) {
+			sc = &cc->cc_callwheel[i];
+			c = 0;
+			LIST_FOREACH(tmp, sc, c_links.le) {
+				c++;
+				t = tmp->c_time - now;
+				if (t < 0)
+					t = 0;
+				st += t / SBT_1US;
+				spr += tmp->c_precision / SBT_1US;
+				if (t > maxt)
+					maxt = t;
+				if (tmp->c_precision > maxpr)
+					maxpr = tmp->c_precision;
+				ct[flssbt(t)]++;
+				cpr[flssbt(tmp->c_precision)]++;
+			}
+			if (c > maxc)
+				maxc = c;
+			ccpbk[fls(c + c / 2)]++;
+			count += c;
+		}
+		CC_UNLOCK(cc);
+#ifdef SMP
+	}
+#endif
+
+	for (i = 0, tcum = 0; i < 64 && tcum < count / 2; i++)
+		tcum += ct[i];
+	medt = (i >= 2) ? (((sbintime_t)1) << (i - 2)) : 0;
+	for (i = 0, pcum = 0; i < 64 && pcum < count / 2; i++)
+		pcum += cpr[i];
+	medpr = (i >= 2) ? (((sbintime_t)1) << (i - 2)) : 0;
+	for (i = 0, c = 0; i < 32 && c < count / 2; i++)
+		c += ccpbk[i];
+	medc = (i >= 2) ? (1 << (i - 2)) : 0;
+
+	printf("Scheduled callouts statistic snapshot:\n");
+	printf("  Callouts: %6d  Buckets: %6d*%-3d  Bucket size: 0.%06ds\n",
+	    count, callwheelsize, mp_ncpus, 1000000 >> CC_HASH_SHIFT);
+	printf("  C/Bk: med %5d         avg %6d.%06jd  max %6d\n",
+	    medc,
+	    count / callwheelsize / mp_ncpus,
+	    (uint64_t)count * 1000000 / callwheelsize / mp_ncpus % 1000000,
+	    maxc);
+	printf("  Time: med %5jd.%06jds avg %6jd.%06jds max %6jd.%06jds\n",
+	    medt / SBT_1S, (medt & 0xffffffff) * 1000000 >> 32,
+	    (st / count) / 1000000, (st / count) % 1000000,
+	    maxt / SBT_1S, (maxt & 0xffffffff) * 1000000 >> 32);
+	printf("  Prec: med %5jd.%06jds avg %6jd.%06jds max %6jd.%06jds\n",
+	    medpr / SBT_1S, (medpr & 0xffffffff) * 1000000 >> 32,
+	    (spr / count) / 1000000, (spr / count) % 1000000,
+	    maxpr / SBT_1S, (maxpr & 0xffffffff) * 1000000 >> 32);
+	printf("  Distribution:       \tbuckets\t   time\t   tcum\t"
+	    "   prec\t   pcum\n");
+	for (i = 0, tcum = pcum = 0; i < 64; i++) {
+		if (ct[i] == 0 && cpr[i] == 0)
+			continue;
+		t = (i != 0) ? (((sbintime_t)1) << (i - 1)) : 0;
+		tcum += ct[i];
+		pcum += cpr[i];
+		printf("  %10jd.%06jds\t 2**%d\t%7d\t%7d\t%7d\t%7d\n",
+		    t / SBT_1S, (t & 0xffffffff) * 1000000 >> 32,
+		    i - 1 - (32 - CC_HASH_SHIFT),
+		    ct[i], tcum, cpr[i], pcum);
+	}
+	return (error);
+}
+SYSCTL_PROC(_kern, OID_AUTO, callout_stat,
+    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
+    0, 0, sysctl_kern_callout_stat, "I",
+    "Dump immediate statistic snapshot of the scheduled callouts");
+
+#ifdef DDB
+static void
+_show_callout(struct callout *c)
+{
+
+	db_printf("callout %p\n", c);
+#define	C_DB_PRINTF(f, e)	db_printf("   %s = " f "\n", #e, c->e);
+	db_printf("   &c_links = %p\n", &(c->c_links));
+	C_DB_PRINTF("%" PRId64,	c_time);
+	C_DB_PRINTF("%" PRId64,	c_precision);
+	C_DB_PRINTF("%p",	c_arg);
+	C_DB_PRINTF("%p",	c_func);
+	C_DB_PRINTF("%p",	c_lock);
+	C_DB_PRINTF("%#x",	c_flags);
+	C_DB_PRINTF("%#x",	c_iflags);
+	C_DB_PRINTF("%d",	c_cpu);
+#undef	C_DB_PRINTF
+}
+
+DB_SHOW_COMMAND(callout, db_show_callout)
+{
+
+	if (!have_addr) {
+		db_printf("usage: show callout <struct callout *>\n");
+		return;
+	}
+
+	_show_callout((struct callout *)addr);
+}
+#endif /* DDB */