diff options
Diffstat (limited to 'cpukit/libtest/t-test-rtems-measure.c')
-rw-r--r-- | cpukit/libtest/t-test-rtems-measure.c | 837 |
1 files changed, 837 insertions, 0 deletions
diff --git a/cpukit/libtest/t-test-rtems-measure.c b/cpukit/libtest/t-test-rtems-measure.c new file mode 100644 index 0000000000..207aef7afa --- /dev/null +++ b/cpukit/libtest/t-test-rtems-measure.c @@ -0,0 +1,837 @@ +/* + * SPDX-License-Identifier: BSD-2-Clause + * + * Copyright (C) 2018 embedded brains GmbH + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE + * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR + * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF + * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS + * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN + * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE + * POSSIBILITY OF SUCH DAMAGE. + */ + +#include <t.h> + +#include <alloca.h> +#include <inttypes.h> +#include <stdlib.h> +#include <string.h> + +#include <rtems.h> + +#define WAKEUP_EVENT RTEMS_EVENT_0 + +typedef struct { + struct T_measure_runtime_context *master; + rtems_id id; + volatile unsigned int *chunk; +} load_context; + +struct T_measure_runtime_context { + T_destructor destructor; + size_t sample_count; + T_ticks *samples; + size_t cache_line_size; + size_t chunk_size; + volatile unsigned int *chunk; + rtems_id runner; + uint32_t load_count; + load_context *load_contexts; +}; + +static unsigned int +dirty_data_cache(volatile unsigned int *chunk, size_t chunk_size, + size_t cache_line_size, unsigned int token) +{ + size_t m; + size_t k; + size_t i; + + m = chunk_size / sizeof(chunk[0]); + k = cache_line_size / sizeof(chunk[0]); + + for (i = 0; i < m; i += k) { + chunk[i] = i + token; + } + + return i + token; +} + +static void +wait_for_worker(void) +{ + rtems_event_set events; + + (void)rtems_event_receive(WAKEUP_EVENT, RTEMS_EVENT_ALL | RTEMS_WAIT, + RTEMS_NO_TIMEOUT, &events); +} + +static void +wakeup_master(const T_measure_runtime_context *ctx) +{ + (void)rtems_event_send(ctx->runner, WAKEUP_EVENT); +} + +static void +suspend_worker(const load_context *lctx) +{ + (void)rtems_task_suspend(lctx->id); +} + +static void +restart_worker(const load_context *lctx) +{ + (void)rtems_task_restart(lctx->id, (rtems_task_argument)lctx); + wait_for_worker(); +} + +static void +load_worker(rtems_task_argument arg) +{ + const load_context *lctx; + T_measure_runtime_context *ctx; + unsigned int token; + volatile unsigned int *chunk; + size_t chunk_size; + size_t cache_line_size; + + lctx = (const load_context *)arg; + ctx = lctx->master; + chunk = lctx->chunk; + chunk_size = ctx->chunk_size; + cache_line_size = ctx->cache_line_size; + token = (unsigned int)rtems_get_current_processor(); + + token = dirty_data_cache(chunk, chunk_size, cache_line_size, token); + wakeup_master(ctx); + + while (true) { + token = dirty_data_cache(chunk, chunk_size, cache_line_size, + token); + } +} + +static void +destroy_worker(const T_measure_runtime_context *ctx) +{ + uint32_t load; + + for (load = 0; load < ctx->load_count; ++load) { + const load_context *lctx; + + lctx = &ctx->load_contexts[load]; + + if (lctx->chunk != ctx->chunk) { + free(RTEMS_DEVOLATILE(unsigned int *, lctx->chunk)); + } + + + if (lctx->id != 0) { + rtems_task_delete(lctx->id); + } + } +} + +static void +destroy(T_destructor *dtor) +{ + T_measure_runtime_context *ctx; + + ctx = (T_measure_runtime_context *)dtor; + destroy_worker(ctx); + free(ctx); +} + +static void * +add_offset(const volatile void *p, uintptr_t o) +{ + return (void *)((uintptr_t)p + o); +} + +static void * +align_up(const volatile void *p, uintptr_t a) +{ + return (void *)(((uintptr_t)p + a - 1) & ~(a - 1)); +} + +T_measure_runtime_context * +T_measure_runtime_create(const T_measure_runtime_config *config) +{ + T_measure_runtime_context *ctx; + size_t sample_size; + size_t cache_line_size; + size_t chunk_size; + size_t load_size; + uint32_t load_count; + bool success; + uint32_t i; +#ifdef RTEMS_SMP + cpu_set_t cpu; +#endif + + sample_size = config->sample_count * sizeof(ctx->samples[0]); + + cache_line_size = rtems_cache_get_data_line_size(); + + if (cache_line_size == 0) { + cache_line_size = 8; + } + + chunk_size = rtems_cache_get_data_cache_size(0); + + if (chunk_size == 0) { + chunk_size = cache_line_size; + } + + chunk_size *= 2; + + load_count = rtems_get_processor_count(); + load_size = load_count * sizeof(ctx->load_contexts[0]); + + ctx = malloc(sizeof(*ctx) + sample_size + load_size + chunk_size + + 2 * cache_line_size); + + if (ctx == NULL) { + return NULL; + } + + ctx->sample_count = config->sample_count; + ctx->samples = add_offset(ctx, sizeof(*ctx)); + ctx->cache_line_size = cache_line_size; + ctx->chunk_size = chunk_size; + ctx->chunk = add_offset(ctx->samples, sample_size); + ctx->runner = rtems_task_self(); + ctx->load_count = load_count; + ctx->load_contexts = add_offset(ctx->chunk, chunk_size); + ctx->samples = align_up(ctx->samples, cache_line_size); + ctx->chunk = align_up(ctx->chunk, cache_line_size); + + memset(ctx->load_contexts, 0, load_size); + success = true; + + for (i = 0; i < load_count; ++i) { + rtems_status_code sc; + rtems_id id; + load_context *lctx; +#ifdef RTEMS_SMP + rtems_task_priority priority; + rtems_id scheduler; + + sc = rtems_scheduler_ident_by_processor(i, &scheduler); + if (sc != RTEMS_SUCCESSFUL) { + continue; + } +#endif + + sc = rtems_task_create(rtems_build_name('L', 'O', 'A', 'D'), + RTEMS_MAXIMUM_PRIORITY - 1, RTEMS_MINIMUM_STACK_SIZE, + RTEMS_DEFAULT_MODES, RTEMS_DEFAULT_ATTRIBUTES, &id); + if (sc != RTEMS_SUCCESSFUL) { + success = false; + break; + } + + lctx = &ctx->load_contexts[i]; + + lctx->master = ctx; + lctx->id = id; + + lctx->chunk = malloc(chunk_size); + if (lctx->chunk == NULL) { + lctx->chunk = ctx->chunk; + } + +#ifdef RTEMS_SMP + (void)rtems_scheduler_get_maximum_priority(scheduler, &priority); + (void)rtems_task_set_scheduler(id, scheduler, priority - 1); + + CPU_ZERO(&cpu); + CPU_SET((int)i, &cpu); + (void)rtems_task_set_affinity(id, sizeof(cpu), &cpu); +#endif + + (void)rtems_task_start(id, load_worker, + (rtems_task_argument)lctx); + + wait_for_worker(); + suspend_worker(lctx); + } + + if (success) { +#ifdef RTEMS_SMP + CPU_ZERO(&cpu); + CPU_SET(0, &cpu); + (void)rtems_task_set_affinity(RTEMS_SELF, sizeof(cpu), &cpu); +#endif + } else { + destroy(&ctx->destructor); + return NULL; + } + + T_add_destructor(&ctx->destructor, destroy); + return ctx; +} + +static int +cmp(const void *ap, const void *bp) +{ + T_ticks a; + T_ticks b; + + a = *(const T_ticks *)ap; + b = *(const T_ticks *)bp; + + if (a < b) { + return -1; + } else if (a > b) { + return 1; + } else { + return 0; + } +} + +static void +measure_variant_begin(const char *name, const char *variant) +{ + T_printf("M:B:%s\n", name); + T_printf("M:V:%s\n", variant); +} + +static T_time +accumulate(const T_ticks *samples, size_t sample_count) +{ + T_time a; + size_t i; + + a = 0; + + for (i = 0; i < sample_count; ++i) { + a += T_ticks_to_time(samples[i]); + } + + return a; +} + +static T_ticks +median_absolute_deviation(T_ticks *samples, size_t sample_count) +{ + T_ticks median; + size_t i; + + median = samples[sample_count / 2]; + + for (i = 0; i < sample_count / 2; ++i) { + samples[i] = median - samples[i]; + } + + for (; i < sample_count; ++i) { + samples[i] = samples[i] - median; + } + + qsort(samples, sample_count, sizeof(samples[0]), cmp); + return samples[sample_count / 2]; +} + +static void +report_sorted_samples(const T_measure_runtime_context *ctx) +{ + size_t sample_count; + const T_ticks *samples; + T_time_string ts; + T_ticks last; + T_ticks v; + size_t count; + size_t i; + + sample_count = ctx->sample_count; + samples = ctx->samples; + last = 0; + --last; + count = 0; + + for (i = 0; i < sample_count; ++i) { + v = samples[i]; + ++count; + + if (v != last) { + uint32_t sa; + uint32_t sb; + uint32_t nsa; + uint32_t nsb; + T_time t; + + T_time_to_seconds_and_nanoseconds(T_ticks_to_time(last), + &sa, &nsa); + t = T_ticks_to_time(v); + T_time_to_seconds_and_nanoseconds(t, &sb, &nsb); + + if (sa != sb || nsa != nsb) { + T_printf("M:S:%zu:%s\n", count, + T_time_to_string_ns(t, ts)); + count = 0; + } + + last = v; + } + } + + if (count > 0) { + T_printf("M:S:%zu:%s\n", count, + T_ticks_to_string_ns(last, ts)); + } +} + +static void +measure_variant_end(const T_measure_runtime_context *ctx, + const T_measure_runtime_request *req, T_time begin) +{ + size_t sample_count; + T_ticks *samples; + T_time_string ts; + T_time d; + T_ticks v; + T_time a; + + sample_count = ctx->sample_count; + samples = ctx->samples; + d = T_now() - begin; + a = accumulate(samples, sample_count); + qsort(samples, sample_count, sizeof(samples[0]), cmp); + T_printf("M:N:%zu\n", sample_count); + + if ((req->flags & T_MEASURE_RUNTIME_REPORT_SAMPLES) != 0) { + report_sorted_samples(ctx); + } + + v = samples[0]; + T_printf("M:MI:%s\n", T_ticks_to_string_ns(v, ts)); + v = samples[(1 * sample_count) / 100]; + T_printf("M:P1:%s\n", T_ticks_to_string_ns(v, ts)); + v = samples[(1 * sample_count) / 4]; + T_printf("M:Q1:%s\n", T_ticks_to_string_ns(v, ts)); + v = samples[sample_count / 2]; + T_printf("M:Q2:%s\n", T_ticks_to_string_ns(v, ts)); + v = samples[(3 * sample_count) / 4]; + T_printf("M:Q3:%s\n", T_ticks_to_string_ns(v, ts)); + v = samples[(99 * sample_count) / 100]; + T_printf("M:P99:%s\n", T_ticks_to_string_ns(v, ts)); + v = samples[sample_count - 1]; + T_printf("M:MX:%s\n", T_ticks_to_string_ns(v, ts)); + v = median_absolute_deviation(samples, sample_count); + T_printf("M:MAD:%s\n", T_ticks_to_string_ns(v, ts)); + T_printf("M:D:%s\n", T_time_to_string_ns(a, ts)); + T_printf("M:E:%s:D:%s\n", req->name, T_time_to_string_ns(d, ts)); +} + +static void +fill_data_cache(volatile unsigned int *chunk, size_t chunk_size, + size_t cache_line_size) +{ + size_t m; + size_t k; + size_t i; + + m = chunk_size / sizeof(chunk[0]); + k = cache_line_size / sizeof(chunk[0]); + + for (i = 0; i < m; i += k) { + chunk[i]; + } +} + +static void +dirty_call(void (*body)(void *), void *arg) +{ + void *space; + + /* Ensure that we use an untouched stack area */ + space = alloca(1024); + RTEMS_OBFUSCATE_VARIABLE(space); + + (*body)(arg); +} + +static void +setup(const T_measure_runtime_request *req, void *arg) +{ + if (req->setup != NULL) { + (*req->setup)(arg); + } +} + +static bool +teardown(const T_measure_runtime_request *req, void *arg, T_ticks *delta, + uint32_t tic, uint32_t toc, unsigned int retry, + unsigned int maximum_retries) +{ + if (req->teardown == NULL) { + return tic == toc || retry >= maximum_retries; + } + + return (*req->teardown)(arg, delta, tic, toc, retry); +} + +static unsigned int +get_maximum_retries(const T_measure_runtime_request *req) +{ + return (req->flags & T_MEASURE_RUNTIME_ALLOW_CLOCK_ISR) != 0 ? 1 : 0; +} + +static void +measure_valid_cache(T_measure_runtime_context *ctx, + const T_measure_runtime_request *req) +{ + size_t sample_count; + T_ticks *samples; + void (*body)(void *); + void *arg; + size_t i; + T_time begin; + + measure_variant_begin(req->name, "ValidCache"); + begin = T_now(); + sample_count = ctx->sample_count; + samples = ctx->samples; + body = req->body; + arg = req->arg; + + for (i = 0; i < sample_count; ++i) { + unsigned int maximum_retries; + unsigned int retry; + + maximum_retries = get_maximum_retries(req); + retry = 0; + + while (true) { + rtems_interval tic; + rtems_interval toc; + T_ticks t0; + T_ticks t1; + + setup(req, arg); + fill_data_cache(ctx->chunk, ctx->chunk_size, + ctx->cache_line_size); + + tic = rtems_clock_get_ticks_since_boot(); + t0 = T_tick(); + (*body)(arg); + t1 = T_tick(); + toc = rtems_clock_get_ticks_since_boot(); + samples[i] = t1 - t0; + + if (teardown(req, arg, &samples[i], tic, toc, retry, + maximum_retries)) { + break; + } + + ++retry; + } + } + + measure_variant_end(ctx, req, begin); +} + +static void +measure_hot_cache(T_measure_runtime_context *ctx, + const T_measure_runtime_request *req) +{ + size_t sample_count; + T_ticks *samples; + void (*body)(void *); + void *arg; + size_t i; + T_time begin; + + measure_variant_begin(req->name, "HotCache"); + begin = T_now(); + sample_count = ctx->sample_count; + samples = ctx->samples; + body = req->body; + arg = req->arg; + + for (i = 0; i < sample_count; ++i) { + unsigned int maximum_retries; + unsigned int retry; + + maximum_retries = get_maximum_retries(req); + retry = 0; + + while (true) { + rtems_interval tic; + rtems_interval toc; + T_ticks t0; + T_ticks t1; + + setup(req, arg); + + tic = rtems_clock_get_ticks_since_boot(); + t0 = T_tick(); + (*body)(arg); + t1 = T_tick(); + toc = rtems_clock_get_ticks_since_boot(); + samples[i] = t1 - t0; + + (void)teardown(req, arg, &samples[i], tic, toc, retry, + 0); + setup(req, arg); + + tic = rtems_clock_get_ticks_since_boot(); + t0 = T_tick(); + (*body)(arg); + t1 = T_tick(); + toc = rtems_clock_get_ticks_since_boot(); + samples[i] = t1 - t0; + + if (teardown(req, arg, &samples[i], tic, toc, retry, + maximum_retries)) { + break; + } + + ++retry; + } + } + + measure_variant_end(ctx, req, begin); +} + +static void +measure_dirty_cache(T_measure_runtime_context *ctx, + const T_measure_runtime_request *req) +{ + size_t sample_count; + T_ticks *samples; + void (*body)(void *); + void *arg; + size_t i; + T_time begin; + size_t token; + + measure_variant_begin(req->name, "DirtyCache"); + begin = T_now(); + sample_count = ctx->sample_count; + samples = ctx->samples; + body = req->body; + arg = req->arg; + token = 0; + + for (i = 0; i < sample_count; ++i) { + unsigned int maximum_retries; + unsigned int retry; + + maximum_retries = get_maximum_retries(req); + retry = 0; + + while (true) { + rtems_interval tic; + rtems_interval toc; + T_ticks t0; + T_ticks t1; + + setup(req, arg); + token = dirty_data_cache(ctx->chunk, ctx->chunk_size, + ctx->cache_line_size, token); + rtems_cache_invalidate_entire_instruction(); + + tic = rtems_clock_get_ticks_since_boot(); + t0 = T_tick(); + dirty_call(body, arg); + t1 = T_tick(); + toc = rtems_clock_get_ticks_since_boot(); + samples[i] = t1 - t0; + + if (teardown(req, arg, &samples[i], tic, toc, retry, + maximum_retries)) { + break; + } + + ++retry; + } + } + + measure_variant_end(ctx, req, begin); +} + +#ifdef __sparc__ +/* + * Use recursive function calls to wake sure that we cause window overflow + * traps in the body. Try to make it hard for the compiler to optimize the + * recursive function away. + */ +static T_ticks +recursive_load_call(void (*body)(void *), void *arg, int n) +{ + T_ticks delta; + + RTEMS_OBFUSCATE_VARIABLE(n); + + if (n > 0) { + delta = recursive_load_call(body, arg, n - 1); + } else { + T_ticks t0; + T_ticks t1; + + t0 = T_tick(); + dirty_call(body, arg); + t1 = T_tick(); + + delta = t1 - t0; + } + + RTEMS_OBFUSCATE_VARIABLE(delta); + return delta; +} +#else +static T_ticks +load_call(void (*body)(void *), void *arg) +{ + T_ticks t0; + T_ticks t1; + + t0 = T_tick(); + dirty_call(body, arg); + t1 = T_tick(); + + return t1 - t0; +} +#endif + +static void +measure_load_variant(T_measure_runtime_context *ctx, + const T_measure_runtime_request *req, + const load_context *lctx, uint32_t load) +{ + size_t sample_count; + T_ticks *samples; + void (*body)(void *); + void *arg; + size_t i; + T_time begin; + size_t token; + + measure_variant_begin(req->name, "Load"); + T_printf("M:L:%" PRIu32 "\n", load + 1); + begin = T_now(); + sample_count = ctx->sample_count; + samples = ctx->samples; + body = req->body; + arg = req->arg; + token = 0; + + restart_worker(lctx); + + for (i = 0; i < sample_count; ++i) { + unsigned int maximum_retries; + unsigned int retry; + + maximum_retries = get_maximum_retries(req); + retry = 0; + + while (true) { + rtems_interval tic; + rtems_interval toc; + T_ticks delta; + + setup(req, arg); + token = dirty_data_cache(ctx->chunk, ctx->chunk_size, + ctx->cache_line_size, token); + rtems_cache_invalidate_entire_instruction(); + + tic = rtems_clock_get_ticks_since_boot(); +#ifdef __sparc__ + delta = recursive_load_call(body, arg, + SPARC_NUMBER_OF_REGISTER_WINDOWS - 3); +#else + delta = load_call(body, arg); +#endif + toc = rtems_clock_get_ticks_since_boot(); + samples[i] = delta; + + if (teardown(req, arg, &samples[i], tic, toc, retry, + maximum_retries)) { + break; + } + + ++retry; + } + } + + measure_variant_end(ctx, req, begin); +} + +static void +measure_load(T_measure_runtime_context *ctx, + const T_measure_runtime_request *req) +{ + const load_context *lctx; + uint32_t load; + +#ifdef RTEMS_SMP + for (load = 0; load < ctx->load_count - 1; ++load) { + lctx = &ctx->load_contexts[load]; + + if (lctx->id != 0) { + if ((req->flags & + T_MEASURE_RUNTIME_DISABLE_MINOR_LOAD) == 0) { + measure_load_variant(ctx, req, lctx, load); + } else { + restart_worker(lctx); + } + } + } +#endif + + if ((req->flags & T_MEASURE_RUNTIME_DISABLE_MAX_LOAD) == 0) { + load = ctx->load_count - 1; + lctx = &ctx->load_contexts[load]; + + if (lctx->id != 0) { + measure_load_variant(ctx, req, lctx, load); + } + } + + for (load = 0; load < ctx->load_count; ++load) { + lctx = &ctx->load_contexts[load]; + + if (lctx->id != 0) { + suspend_worker(lctx); + } + } +} + +void +T_measure_runtime(T_measure_runtime_context *ctx, + const T_measure_runtime_request *req) +{ + /* + * Do ValidCache variant before HotCache to get a good overall cache + * state for the HotCache variant. + */ + if ((req->flags & T_MEASURE_RUNTIME_DISABLE_VALID_CACHE) == 0) { + measure_valid_cache(ctx, req); + } + + if ((req->flags & T_MEASURE_RUNTIME_DISABLE_HOT_CACHE) == 0) { + measure_hot_cache(ctx, req); + } + + if ((req->flags & T_MEASURE_RUNTIME_DISABLE_DIRTY_CACHE) == 0) { + measure_dirty_cache(ctx, req); + } + + measure_load(ctx, req); +} |