summaryrefslogtreecommitdiff
path: root/testsuites/isvv/23.6_multi_cache_split_transactions_enabled/multi_cache_split_transactions_enabled.c
diff options
context:
space:
mode:
Diffstat (limited to 'testsuites/isvv/23.6_multi_cache_split_transactions_enabled/multi_cache_split_transactions_enabled.c')
-rw-r--r--testsuites/isvv/23.6_multi_cache_split_transactions_enabled/multi_cache_split_transactions_enabled.c495
1 files changed, 495 insertions, 0 deletions
diff --git a/testsuites/isvv/23.6_multi_cache_split_transactions_enabled/multi_cache_split_transactions_enabled.c b/testsuites/isvv/23.6_multi_cache_split_transactions_enabled/multi_cache_split_transactions_enabled.c
new file mode 100644
index 0000000000..dcaa534c6c
--- /dev/null
+++ b/testsuites/isvv/23.6_multi_cache_split_transactions_enabled/multi_cache_split_transactions_enabled.c
@@ -0,0 +1,495 @@
+/* SPDX-License-Identifier: BSD-2-Clause */
+
+/*
+ * Copyright (C) 2022 Critical Software SA
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stddef.h>
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <rtems.h>
+#include <rtems/bspIo.h>
+#include <string.h>
+#include "../shared/utils.h"
+#include "../shared/isvv_rtems_aux.h"
+#include "../shared/low_level_utils.h"
+
+
+/**
+ *
+ * @brief Tests impact performing of L2 cache split transactions
+ *
+ * Step 2: Multiprocessor with L2 cache split transactions enabled
+ *
+ * There is one set of math equations processed:
+ * - named "filter_simulation"
+ *
+ * The equations for "filter_simulation" are used by TEST_PROCESSORS tasks that
+ * processes small sections of a "data array" in parallel.
+ *
+ * The locations in memory for acessing the small sections of the "data arrays" do
+ * overlap in terms of cache memory positions, and when acessing one section of the
+ * "data array", or even a smaller part of one, other sections of that "data array"
+ * should be kicked out of the cache. Sometimes that may happen within the same task.
+ *
+ * With the "L2 cache split transactions" enabled, everytime a data word is required
+ * by the CPU from L2 and it may happen that the L2 cache is busy retrieving other other data
+ * from the main memory, and the data word would need to wait for the other transaction to
+ * finish if L2 cache split transactions enabled.
+ *
+ * With L2 cache split transactions enabled, L2 cache can issue a split transactation to
+ * the other data transaction, and becomes free to attend the data word that was required by CPU.
+ * For the other data transaction, the master of the bus that requested it must issue a Retry
+ * transaction.
+ *
+ * Expected Results:
+ * - The Tiles must be processed only once.
+ * - "Ouput Data Result Value" must match in the Uniprocessor version with the
+ * Multiprocessor version with L2 cache split transactions disabled.
+ * - Elapsed Time should be lower than the Uniprocessor version, and slightly lower
+ * than Multiprocessor version with L2 cache split transactions disabled.
+ * - The number of AHB Splits transactions reported must be significant
+ *
+ */
+
+/**
+ *
+ * For standalone tests in the actual hardware boards the following options can be used:
+ *
+ * 1) use make XFLAGS="-Dgr740 -DGR740_ESA_BOARD"
+ * 2) declare #define GR740_ESA_BOARD at the beginning of this file
+ *
+ */
+
+#define MAX_TLS_SIZE RTEMS_ALIGN_UP(64, RTEMS_TASK_STORAGE_ALIGNMENT)
+
+#define TASK_ATTRIBUTES RTEMS_FLOATING_POINT
+
+#define TASK_STORAGE_SIZE \
+ RTEMS_TASK_STORAGE_SIZE( \
+ MAX_TLS_SIZE + RTEMS_MINIMUM_STACK_SIZE, \
+ TASK_ATTRIBUTES)
+
+// test specific global vars
+#define TASK_COUNT TEST_PROCESSORS
+#define TOTAL_TILES 64
+
+#define MAX_MESSAGE_QUEUES 5
+#define MAX_MESSAGE_SIZE sizeof(uint8_t)
+#define MAX_PENDING_MESSAGES 10
+
+rtems_event_set event_send[4] = {RTEMS_EVENT_1,
+ RTEMS_EVENT_2,
+ RTEMS_EVENT_3,
+ RTEMS_EVENT_4};
+
+uint8_t count_process[TOTAL_TILES];
+
+typedef struct
+{
+ rtems_id main_task;
+ uint8_t ntiles;
+ uint8_t next_tile;
+ rtems_id task_id[TASK_COUNT];
+ rtems_id tile_queue;
+ rtems_id message_queue[TASK_COUNT];
+ rtems_id mutex_id;
+ uint64_t accxL2;
+} test_context;
+
+RTEMS_ALIGNED(RTEMS_TASK_STORAGE_ALIGNMENT)
+static char calc_task_storage[TASK_COUNT][TASK_STORAGE_SIZE];
+
+RTEMS_MESSAGE_QUEUE_BUFFER(MAX_MESSAGE_SIZE)
+msg_tile_queue_storage[MAX_PENDING_MESSAGES];
+
+RTEMS_MESSAGE_QUEUE_BUFFER(MAX_MESSAGE_SIZE)
+msg_task_queue_storage[TASK_COUNT][MAX_PENDING_MESSAGES];
+
+
+#define ITOA_STR_SIZE (8*sizeof(int)+1)
+
+//-----------------------------------------------------------------------------------------
+#define L2_CACHE_SIZE (4U*512U*1024U) // 2Mbytes
+#define L2_CACHE_WAY_SIZE (512U*1024U) // 512kbytes
+#define xL2_ELEM (8*L2_CACHE_SIZE/sizeof(uint32_t)) // 4M elements
+
+#ifdef GR740_ESA_BOARD
+RTEMS_ALIGNED(L2_CACHE_SIZE)
+#endif
+static uint32_t xL2[xL2_ELEM];
+
+//-----------------------------------------------------------------------------------------
+#define FILTER_TAPS (16U)
+#define COEFS_SIZE (128U)
+const uint32_t coefs[COEFS_SIZE] =
+ { 29, 31, 37, 41, 43, 47, 53, 59,
+ 61, 67, 71, 73, 79, 83, 89, 97,
+ 101, 103, 107, 109, 113, 127, 131, 137,
+ 139, 149, 151, 157, 163, 167, 173, 179,
+ 181, 191, 193, 197, 199, 211, 223, 227,
+ 229, 233, 239, 241, 251, 257, 263, 269,
+ 271, 277, 281, 283, 293, 307, 311, 313,
+ 317, 331, 337, 347, 349, 353, 359, 367,
+ 373, 379, 383, 389, 397, 401, 409, 419,
+ 421, 431, 433, 439, 443, 449, 457, 461,
+ 463, 467, 479, 487, 491, 499, 503, 509,
+ 521, 523, 541, 547, 557, 563, 569, 571,
+ 577, 587, 593, 599, 601, 607, 613, 617,
+ 619, 631, 641, 643, 647, 653, 659, 661,
+ 673, 677, 683, 691, 701, 709, 719, 727,
+ 733, 739, 743, 751, 757, 761, 769, 773};
+
+static void fill_main_memory_with_data(void){
+ // Store to memory
+ for ( uint32_t j = 0 ; j < xL2_ELEM; j ++)
+ xL2[j] = j;
+}
+
+static uint64_t warmup_caches(void){
+ uint64_t acc = 0;
+ for ( uint32_t j = 0 ; j < xL2_ELEM; j++)
+ acc += xL2[j];
+ return acc;
+}
+
+static uint64_t calc_filter_simulation_equation(uint8_t tile, uint32_t total_elems){
+ uint64_t acc = 0;
+ const uint32_t begin_idx = tile*xL2_ELEM/total_elems;
+ const uint32_t end_idx = begin_idx + (xL2_ELEM/total_elems) - 1;
+
+ for ( uint32_t j = begin_idx ; j <= end_idx; j ++) {
+ uint32_t i;
+
+ // Simulating filtering/convolution
+ uint32_t t1 = 0, t2 = 0, t3 = 0, t4 = 0;
+ for (i = 0 ; i < FILTER_TAPS; i++)
+ t1 += xL2[(j+i) % xL2_ELEM]*coefs[i];
+
+ for (i = 0 ; i < FILTER_TAPS; i++)
+ t1 += xL2[(j+i+xL2_ELEM*1/8/sizeof(uint32_t)) % xL2_ELEM]*coefs[i];
+
+ for (i = 0 ; i < FILTER_TAPS; i++)
+ t1 += xL2[(j+i+xL2_ELEM*2/8/sizeof(uint32_t)) % xL2_ELEM]*coefs[i];
+
+ for (i = 0 ; i < FILTER_TAPS; i++)
+ t1 += xL2[(j+i+xL2_ELEM*3/8/sizeof(uint32_t)) % xL2_ELEM]*coefs[i];
+
+ for (i = 0 ; i < FILTER_TAPS; i++)
+ t2 += xL2[(j+i+xL2_ELEM*4/8/sizeof(uint32_t)) % xL2_ELEM]*coefs[i];
+
+ for (i = 0 ; i < FILTER_TAPS; i++)
+ t2 += xL2[(j+i+xL2_ELEM*5/8/sizeof(uint32_t)) % xL2_ELEM]*coefs[i];
+
+ for (i = 0 ; i < FILTER_TAPS; i++)
+ t2 += xL2[(j+i+xL2_ELEM*6/8/sizeof(uint32_t)) % xL2_ELEM]*coefs[i];
+
+ for (i = 0 ; i < FILTER_TAPS; i++)
+ t2 += xL2[(j+i+xL2_ELEM*7/8/sizeof(uint32_t)) % xL2_ELEM]*coefs[i];
+
+ // Simulating normalization
+ t3 = ((t1*1000)/(42*137)) + 1;
+ t4 = ((t2*10)/(96*137)) + 1;
+ uint64_t t5 = (uint64_t)t3 * (uint64_t)t3;
+ uint64_t t6 = (uint64_t)t4 * (uint64_t)t4;
+ acc += (t5+t6)/((uint64_t)isqrt(t4+t3));
+ }
+ return acc;
+}
+
+
+static void calc_task_function(rtems_task_argument arg)
+{
+ test_context *ctx;
+ uint64_t acc;
+
+ ctx = (test_context *)arg;
+
+ uint8_t tile;
+ rtems_id local_id = TaskSelfId();
+ uint8_t task_idx = 255;
+
+ for (int i = 0; i < TASK_COUNT; i++){
+ if (ctx->task_id[i] == local_id){
+ task_idx = i;
+ break;
+ }
+ }
+
+ char ch = '0' + task_idx;
+
+ rtems_message_queue_config msg_config = {
+ .name = rtems_build_name('M', 'S', 'G', ch),
+ .maximum_pending_messages = MAX_PENDING_MESSAGES,
+ .maximum_message_size = MAX_MESSAGE_SIZE,
+ .storage_size = sizeof(msg_task_queue_storage[task_idx]),
+ .storage_area = &msg_task_queue_storage[task_idx],
+ .attributes = RTEMS_FIFO | RTEMS_GLOBAL};
+
+ ctx->message_queue[task_idx] = CreateMessageQueue(msg_config);
+
+ SendMessage(ctx->tile_queue, &task_idx, sizeof(task_idx));
+ ReceiveMessage(ctx->message_queue[task_idx], &tile);
+
+ while (tile <= ctx->ntiles) {
+ acc = 0;
+ count_process[tile - 1]++;
+ acc = calc_filter_simulation_equation(tile-1, ctx->ntiles);
+
+ ObtainMutex(ctx->mutex_id);
+ ctx->accxL2 += acc;
+ ReleaseMutex(ctx->mutex_id);
+
+ SendMessage(ctx->tile_queue, &task_idx, sizeof(task_idx));
+ ReceiveMessage(ctx->message_queue[task_idx], &tile);
+ }
+
+ SendMessage(ctx->tile_queue, &task_idx, sizeof(task_idx));
+ SendEvents(ctx->main_task, event_send[task_idx]);
+ SuspendSelf();
+}
+
+static void Init(rtems_task_argument arg){
+ (void)arg;
+ test_context ctx;
+ uint32_t start_time, end_time, elapsed_time;
+ char ch, str[ITOA_STR_SIZE];
+ uint32_t total_events = 0;
+ uint8_t task = 255;
+ bool correctly_processed = true;
+ (void) memset(&ctx, 0, sizeof(test_context));
+ (void) memset(&count_process[0], 0, TOTAL_TILES);
+ (void) memset(&xL2[0], 0, xL2_ELEM);
+
+#ifdef GR740_ESA_BOARD
+ soc_stats_regs soc_stats;
+#endif
+
+
+//-----------------------------------------------------------------------------
+// Create/Initialize Objects
+//-----------------------------------------------------------------------------
+ rtems_message_queue_config msg_config = {
+ .name = rtems_build_name('M', 'S', 'G', 'T'),
+ .maximum_pending_messages = RTEMS_ARRAY_SIZE(msg_tile_queue_storage),
+ .maximum_message_size = MAX_MESSAGE_SIZE,
+ .storage_size = sizeof(msg_tile_queue_storage),
+ .storage_area = &msg_tile_queue_storage,
+ .attributes = RTEMS_FIFO | RTEMS_GLOBAL};
+
+ ctx.main_task = rtems_task_self();
+ ctx.tile_queue = CreateMessageQueue(msg_config);
+ ctx.ntiles = TOTAL_TILES;
+ ctx.next_tile = 1;
+
+ rtems_task_config calc_task_config = {
+ .initial_priority = PRIO_NORMAL,
+ .storage_size = TASK_STORAGE_SIZE,
+ .maximum_thread_local_storage_size = MAX_TLS_SIZE,
+ .initial_modes = RTEMS_DEFAULT_MODES,
+ .attributes = TASK_ATTRIBUTES};
+
+ ctx.mutex_id = CreateMutex(rtems_build_name('M', 'U', 'T', 'X'));
+
+ for (uint32_t i = 0; i < TASK_COUNT; i++){
+ ch = '0' + i;
+ calc_task_config.name = rtems_build_name('R', 'U', 'N', ch);
+ calc_task_config.storage_area = &calc_task_storage[i][0];
+
+ ctx.task_id[i] = DoCreateTask(calc_task_config);
+ StartTask(ctx.task_id[i], calc_task_function, &ctx);
+ total_events += event_send[i];
+ }
+ SetSelfPriority( PRIO_NORMAL );
+
+
+//-----------------------------------------------------------------------------
+// Setup the testcase
+//-----------------------------------------------------------------------------
+ fill_main_memory_with_data();
+ l1_dcache_flush();
+ l1_dcache_disable();
+
+#ifdef GR740_ESA_BOARD
+ l2_cache_disable();
+ l2_cache_flush();
+ l2_cache_enable();
+#endif
+
+ l1_dcache_enable();
+ uint32_t control_data_word = warmup_caches();
+
+#ifdef GR740_ESA_BOARD
+ l2_cache_enable_split_responses();
+ clockgating_enable_l4stat();
+ soc_stats_configure_regs();
+ soc_stats_init(&soc_stats);
+#endif
+
+//-----------------------------------------------------------------------------
+// Do the work: distribute the work through the tasks
+//-----------------------------------------------------------------------------
+ start_time = rtems_clock_get_ticks_since_boot();
+ while (ReceiveAvailableEvents() != total_events) {
+ ReceiveMessage(ctx.tile_queue, &task);
+ SendMessage(ctx.message_queue[task], &ctx.next_tile, sizeof(ctx.next_tile));
+ ctx.next_tile++;
+ }
+ end_time = rtems_clock_get_ticks_since_boot();
+ elapsed_time = end_time - start_time;
+
+#ifdef GR740_ESA_BOARD
+ soc_stats_update(&soc_stats);
+#endif
+
+//-----------------------------------------------------------------------------
+// Show Results
+//-----------------------------------------------------------------------------
+ print_string("\n");
+ print_string("Multicore Elapsed Time -");
+ print_string(itoa(elapsed_time, &str[0], 10));
+ print_string("\n");
+
+ for (uint8_t i = 0; i < ctx.ntiles; i++){
+ if (count_process[i] != 1){
+ correctly_processed = false;
+ break;
+ }
+ }
+ if (correctly_processed){
+ print_string("Each tile only processed once : true\n");
+ }
+ else{
+ print_string("Each tile only processed once : false\n");
+ }
+
+ print_string("Input Data Result Value : 0x");
+ print_string(itoa(control_data_word , &str[0], 16));
+ print_string("\n");
+
+ print_string("Ouput Data Result Value : 0x");
+ if (ctx.accxL2>=UINT32_MAX) {
+ print_string(itoa( (int32_t) ((ctx.accxL2 >> 32U) & ((uint64_t)UINT32_MAX)) , &str[0], 16));
+ print_string(itoa( (int32_t) (ctx.accxL2 & ((uint64_t)UINT32_MAX)) , &str[0], 16));
+ }
+ else {
+ print_string(itoa((int32_t)ctx.accxL2 , &str[0], 16));
+ }
+ print_string("\n");
+
+
+#ifdef GR740_ESA_BOARD
+ print_string("L1 Instr Cache misses (read) CPU_0 : ");
+ print_string(itoa(soc_stats.l1_inst_cache_miss[0], &str[0], 10));
+ print_string("\n");
+ print_string("L1 Instr Cache misses (read) CPU_1 : ");
+ print_string(itoa(soc_stats.l1_inst_cache_miss[1], &str[0], 10));
+ print_string("\n");
+ print_string("L1 Instr Cache misses (read) CPU_2 : ");
+ print_string(itoa(soc_stats.l1_inst_cache_miss[2], &str[0], 10));
+ print_string("\n");
+ print_string("L1 Instr Cache misses (read) CPU_3 : ");
+ print_string(itoa(soc_stats.l1_inst_cache_miss[3], &str[0], 10));
+ print_string("\n");
+ print_string("L1 Data Cache misses (read) CPU_0 : ");
+ print_string(itoa(soc_stats.l1_data_cache_miss[0], &str[0], 10));
+ print_string("\n");
+ print_string("L1 Data Cache misses (read) CPU_1 : ");
+ print_string(itoa(soc_stats.l1_data_cache_miss[1], &str[0], 10));
+ print_string("\n");
+ print_string("L1 Data Cache misses (read) CPU_2 : ");
+ print_string(itoa(soc_stats.l1_data_cache_miss[2], &str[0], 10));
+ print_string("\n");
+ print_string("L1 Data Cache misses (read) CPU_3 : ");
+ print_string(itoa(soc_stats.l1_data_cache_miss[3], &str[0], 10));
+ print_string("\n");
+ print_string("L2 Cache hits (read + writes) : ");
+ print_string(itoa(soc_stats.l2_cache_hits, &str[0], 10));
+ print_string("\n");
+ print_string("L2 Cache misses (read + writes) : ");
+ print_string(itoa(soc_stats.l2_cache_miss, &str[0], 10));
+ print_string("\n");
+ print_string("AHB Splits : ");
+ print_string(itoa(soc_stats.ahb_split_delay, &str[0], 10));
+ print_string("\n");
+ print_string("\n");
+#endif
+
+// --------------------------------------------------------------------------
+// Delete Objects and Finalize testcase
+// --------------------------------------------------------------------------
+ for (uint32_t i = 0; i < TASK_COUNT; i++){
+ DeleteTask(ctx.task_id[i]);
+ DeleteMessageQueue(ctx.message_queue[i]);
+ }
+ DeleteMessageQueue(ctx.tile_queue);
+
+ DeleteMutex(ctx.mutex_id);
+
+ rtems_fatal(RTEMS_FATAL_SOURCE_EXIT, 0);
+}
+
+#define CONFIGURE_APPLICATION_NEEDS_CLOCK_DRIVER
+
+#define CONFIGURE_MAXIMUM_PROCESSORS TEST_PROCESSORS
+
+#define CONFIGURE_MAXIMUM_MESSAGE_QUEUES MAX_MESSAGE_QUEUES
+
+#define CONFIGURE_MAXIMUM_SEMAPHORES 1
+
+#define CONFIGURE_MAXIMUM_TASKS ( TEST_PROCESSORS + 1 )
+
+#define CONFIGURE_SCHEDULER_EDF_SMP
+
+#define CONFIGURE_MINIMUM_TASK_STACK_SIZE \
+ RTEMS_MINIMUM_STACK_SIZE + CPU_STACK_ALIGNMENT
+
+#define CONFIGURE_EXTRA_TASK_STACKS RTEMS_MINIMUM_STACK_SIZE
+
+#define CONFIGURE_INIT_TASK_CONSTRUCT_STORAGE_SIZE 2 * TASK_STORAGE_SIZE
+
+#define CONFIGURE_MINIMUM_TASKS_WITH_USER_PROVIDED_STORAGE \
+ CONFIGURE_MAXIMUM_TASKS
+
+#define CONFIGURE_MICROSECONDS_PER_TICK 1000
+
+#define CONFIGURE_MAXIMUM_FILE_DESCRIPTORS 0
+
+#define CONFIGURE_DISABLE_NEWLIB_REENTRANCY
+
+#define CONFIGURE_APPLICATION_DISABLE_FILESYSTEM
+
+#define CONFIGURE_MAXIMUM_THREAD_LOCAL_STORAGE_SIZE MAX_TLS_SIZE
+
+#define CONFIGURE_RTEMS_INIT_TASKS_TABLE
+
+#define CONFIGURE_INIT_TASK_ATTRIBUTES (RTEMS_SYSTEM_TASK | TASK_ATTRIBUTES)
+
+#define CONFIGURE_INIT_TASK_INITIAL_MODES RTEMS_DEFAULT_MODES
+
+#define CONFIGURE_INIT
+
+#include <rtems/confdefs.h>