1 files changed, 495 insertions, 0 deletions
diff --git a/testsuites/isvv/23.6_multi_cache_split_transactions_enabled/multi_cache_split_transactions_enabled.c b/testsuites/isvv/23.6_multi_cache_split_transactions_enabled/multi_cache_split_transactions_enabled.c
new file mode 100644
index 0000000000..dcaa534c6c
--- /dev/null
+++ b/testsuites/isvv/23.6_multi_cache_split_transactions_enabled/multi_cache_split_transactions_enabled.c
@@ -0,0 +1,495 @@
+/* SPDX-License-Identifier: BSD-2-Clause */
+
+/*
+ * Copyright (C) 2022 Critical Software SA
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
+ * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
+ * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
+ * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+ * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+ * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
+ * POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include <stddef.h>
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <rtems.h>
+#include <rtems/bspIo.h>
+#include <string.h>
+#include "../shared/utils.h"
+#include "../shared/isvv_rtems_aux.h"
+#include "../shared/low_level_utils.h"
+
+
+/**
+ *
+ * @brief Tests impact performing of L2 cache split transactions
+ *
+ * Step 2: Multiprocessor with L2 cache split transactions enabled
+ *
+ * There is one set of math equations processed:
+ *  - named "filter_simulation"
+ *
+ * The equations for "filter_simulation" are used by TEST_PROCESSORS tasks that
+ * processes small sections of a "data array" in parallel.
+ *
+ * The locations in memory for acessing the small sections of the "data arrays" do
+ * overlap in terms of cache memory positions, and when acessing one section of the
+ * "data array", or even a smaller part of one, other sections of that "data array"
+ * should be kicked out of the cache. Sometimes that may happen within the same task.
+ *
+ * With the "L2 cache split transactions" enabled, everytime a data word is required
+ * by the CPU from L2 and it may happen that the L2 cache is busy retrieving other other data
+ * from the main memory, and the data word would need to wait for the other transaction to
+ * finish if L2 cache split transactions enabled.
+ *
+ * With L2 cache split transactions enabled, L2 cache can issue a split transactation to
+ * the other data transaction, and becomes free to attend the data word that was required by CPU.
+ * For the other data transaction, the master of the bus that requested it must issue a Retry
+ * transaction.
+ *
+ * Expected Results:
+ *   - The Tiles must be processed only once.
+ *   - "Ouput Data Result Value" must match in the Uniprocessor version with the
+ *      Multiprocessor version with L2 cache split transactions disabled.
+ *   - Elapsed Time should be lower than the Uniprocessor version, and slightly lower
+ *      than Multiprocessor version with L2 cache split transactions disabled.
+ *   - The number of AHB Splits transactions reported must be significant
+ *
+ */
+
+/**
+ *
+ * For standalone tests in the actual hardware boards the following options can be used:
+ *
+ *  1) use make XFLAGS="-Dgr740 -DGR740_ESA_BOARD"
+ *  2) declare #define GR740_ESA_BOARD at the beginning of this file
+ *
+ */
+
+#define MAX_TLS_SIZE RTEMS_ALIGN_UP(64, RTEMS_TASK_STORAGE_ALIGNMENT)
+
+#define TASK_ATTRIBUTES RTEMS_FLOATING_POINT
+
+#define TASK_STORAGE_SIZE                      \
+  RTEMS_TASK_STORAGE_SIZE(                     \
+      MAX_TLS_SIZE + RTEMS_MINIMUM_STACK_SIZE, \
+      TASK_ATTRIBUTES)
+
+// test specific global vars
+#define TASK_COUNT TEST_PROCESSORS
+#define TOTAL_TILES 64
+
+#define MAX_MESSAGE_QUEUES 5
+#define MAX_MESSAGE_SIZE sizeof(uint8_t)
+#define MAX_PENDING_MESSAGES 10
+
+rtems_event_set event_send[4] = {RTEMS_EVENT_1,
+                                 RTEMS_EVENT_2,
+                                 RTEMS_EVENT_3,
+                                 RTEMS_EVENT_4};
+
+uint8_t count_process[TOTAL_TILES];
+
+typedef struct
+{
+  rtems_id main_task;
+  uint8_t ntiles;
+  uint8_t next_tile;
+  rtems_id task_id[TASK_COUNT];
+  rtems_id tile_queue;
+  rtems_id message_queue[TASK_COUNT];
+  rtems_id mutex_id;
+  uint64_t accxL2;
+} test_context;
+
+RTEMS_ALIGNED(RTEMS_TASK_STORAGE_ALIGNMENT)
+static char calc_task_storage[TASK_COUNT][TASK_STORAGE_SIZE];
+
+RTEMS_MESSAGE_QUEUE_BUFFER(MAX_MESSAGE_SIZE)
+msg_tile_queue_storage[MAX_PENDING_MESSAGES];
+
+RTEMS_MESSAGE_QUEUE_BUFFER(MAX_MESSAGE_SIZE)
+msg_task_queue_storage[TASK_COUNT][MAX_PENDING_MESSAGES];
+
+
+#define ITOA_STR_SIZE     (8*sizeof(int)+1)
+
+//-----------------------------------------------------------------------------------------
+#define L2_CACHE_SIZE     (4U*512U*1024U)                     // 2Mbytes
+#define L2_CACHE_WAY_SIZE (512U*1024U)                        // 512kbytes
+#define xL2_ELEM          (8*L2_CACHE_SIZE/sizeof(uint32_t))  // 4M elements
+
+#ifdef GR740_ESA_BOARD
+RTEMS_ALIGNED(L2_CACHE_SIZE)
+#endif
+static uint32_t xL2[xL2_ELEM];
+
+//-----------------------------------------------------------------------------------------
+#define FILTER_TAPS       (16U)
+#define COEFS_SIZE        (128U)
+const uint32_t coefs[COEFS_SIZE] =
+                            { 29,  31,  37,  41,  43,  47,  53,  59,
+                              61,  67,  71,  73,  79,  83,  89,  97,
+                             101, 103, 107, 109, 113, 127, 131, 137,
+                             139, 149, 151, 157, 163, 167, 173, 179,
+                             181, 191, 193, 197, 199, 211, 223, 227,
+                             229, 233, 239, 241, 251, 257, 263, 269,
+                             271, 277, 281, 283, 293, 307, 311, 313,
+                             317, 331, 337, 347, 349, 353, 359, 367,
+                             373, 379, 383, 389, 397, 401, 409, 419,
+                             421, 431, 433, 439, 443, 449, 457, 461,
+                             463, 467, 479, 487, 491, 499, 503, 509,
+                             521, 523, 541, 547, 557, 563, 569, 571,
+                             577, 587, 593, 599, 601, 607, 613, 617,
+                             619, 631, 641, 643, 647, 653, 659, 661,
+                             673, 677, 683, 691, 701, 709, 719, 727,
+                             733, 739, 743, 751, 757, 761, 769, 773};
+
+static void fill_main_memory_with_data(void){
+  // Store to memory
+  for ( uint32_t j = 0 ; j < xL2_ELEM; j ++)
+    xL2[j] = j;
+}
+
+static uint64_t warmup_caches(void){
+  uint64_t acc = 0;
+  for ( uint32_t j = 0 ; j < xL2_ELEM; j++)
+    acc += xL2[j];
+  return acc;
+}
+
+static uint64_t calc_filter_simulation_equation(uint8_t tile, uint32_t total_elems){
+  uint64_t acc = 0;
+  const uint32_t begin_idx = tile*xL2_ELEM/total_elems;
+  const uint32_t end_idx   = begin_idx + (xL2_ELEM/total_elems) - 1;
+
+  for ( uint32_t j = begin_idx ; j <= end_idx; j ++) {
+    uint32_t i;
+    
+    // Simulating filtering/convolution
+    uint32_t t1 = 0, t2 = 0, t3 = 0, t4 = 0;
+    for (i = 0 ; i < FILTER_TAPS; i++)
+      t1 += xL2[(j+i) % xL2_ELEM]*coefs[i];
+
+    for (i = 0 ; i < FILTER_TAPS; i++)
+      t1 += xL2[(j+i+xL2_ELEM*1/8/sizeof(uint32_t)) % xL2_ELEM]*coefs[i];
+
+    for (i = 0 ; i < FILTER_TAPS; i++)
+      t1 += xL2[(j+i+xL2_ELEM*2/8/sizeof(uint32_t)) % xL2_ELEM]*coefs[i];
+
+    for (i = 0 ; i < FILTER_TAPS; i++)
+      t1 += xL2[(j+i+xL2_ELEM*3/8/sizeof(uint32_t)) % xL2_ELEM]*coefs[i];
+
+    for (i = 0 ; i < FILTER_TAPS; i++)
+      t2 += xL2[(j+i+xL2_ELEM*4/8/sizeof(uint32_t)) % xL2_ELEM]*coefs[i];
+
+    for (i = 0 ; i < FILTER_TAPS; i++)
+      t2 += xL2[(j+i+xL2_ELEM*5/8/sizeof(uint32_t)) % xL2_ELEM]*coefs[i];
+
+    for (i = 0 ; i < FILTER_TAPS; i++)
+      t2 += xL2[(j+i+xL2_ELEM*6/8/sizeof(uint32_t)) % xL2_ELEM]*coefs[i];
+
+    for (i = 0 ; i < FILTER_TAPS; i++)
+      t2 += xL2[(j+i+xL2_ELEM*7/8/sizeof(uint32_t)) % xL2_ELEM]*coefs[i];
+
+    // Simulating normalization
+    t3 = ((t1*1000)/(42*137)) + 1;
+    t4 = ((t2*10)/(96*137)) + 1;
+    uint64_t t5 = (uint64_t)t3 * (uint64_t)t3;
+    uint64_t t6 = (uint64_t)t4 * (uint64_t)t4;
+    acc += (t5+t6)/((uint64_t)isqrt(t4+t3));
+  }
+  return acc;
+}
+
+
+static void calc_task_function(rtems_task_argument arg)
+{
+  test_context *ctx;
+  uint64_t acc;
+
+  ctx = (test_context *)arg;
+
+  uint8_t tile;
+  rtems_id local_id = TaskSelfId();
+  uint8_t task_idx = 255;
+
+  for (int i = 0; i < TASK_COUNT; i++){
+    if (ctx->task_id[i] == local_id){
+      task_idx = i;
+      break;
+    }
+  }
+
+  char ch = '0' + task_idx;
+
+  rtems_message_queue_config msg_config = {
+      .name = rtems_build_name('M', 'S', 'G', ch),
+      .maximum_pending_messages = MAX_PENDING_MESSAGES,
+      .maximum_message_size = MAX_MESSAGE_SIZE,
+      .storage_size = sizeof(msg_task_queue_storage[task_idx]),
+      .storage_area = &msg_task_queue_storage[task_idx],
+      .attributes = RTEMS_FIFO | RTEMS_GLOBAL};
+
+  ctx->message_queue[task_idx] = CreateMessageQueue(msg_config);
+
+  SendMessage(ctx->tile_queue, &task_idx, sizeof(task_idx));
+  ReceiveMessage(ctx->message_queue[task_idx], &tile);
+
+  while (tile <= ctx->ntiles) {
+    acc = 0;
+    count_process[tile - 1]++;
+    acc = calc_filter_simulation_equation(tile-1, ctx->ntiles);
+
+    ObtainMutex(ctx->mutex_id);
+    ctx->accxL2 += acc;
+    ReleaseMutex(ctx->mutex_id);
+
+    SendMessage(ctx->tile_queue, &task_idx, sizeof(task_idx));
+    ReceiveMessage(ctx->message_queue[task_idx], &tile);
+  }
+
+  SendMessage(ctx->tile_queue, &task_idx, sizeof(task_idx));
+  SendEvents(ctx->main_task, event_send[task_idx]);
+  SuspendSelf();
+}
+
+static void Init(rtems_task_argument arg){
+  (void)arg;
+  test_context ctx;
+  uint32_t start_time, end_time, elapsed_time;
+  char ch, str[ITOA_STR_SIZE];
+  uint32_t total_events = 0;
+  uint8_t task = 255;
+  bool correctly_processed = true;
+  (void) memset(&ctx, 0, sizeof(test_context));
+  (void) memset(&count_process[0], 0, TOTAL_TILES);
+  (void) memset(&xL2[0], 0, xL2_ELEM);
+  
+#ifdef GR740_ESA_BOARD 
+  soc_stats_regs soc_stats;
+#endif
+
+
+//-----------------------------------------------------------------------------
+// Create/Initialize Objects
+//-----------------------------------------------------------------------------
+  rtems_message_queue_config msg_config = {
+      .name = rtems_build_name('M', 'S', 'G', 'T'),
+      .maximum_pending_messages = RTEMS_ARRAY_SIZE(msg_tile_queue_storage),
+      .maximum_message_size = MAX_MESSAGE_SIZE,
+      .storage_size = sizeof(msg_tile_queue_storage),
+      .storage_area = &msg_tile_queue_storage,
+      .attributes = RTEMS_FIFO | RTEMS_GLOBAL};
+
+  ctx.main_task = rtems_task_self();
+  ctx.tile_queue = CreateMessageQueue(msg_config);
+  ctx.ntiles = TOTAL_TILES;
+  ctx.next_tile = 1;
+
+  rtems_task_config calc_task_config = {
+      .initial_priority = PRIO_NORMAL,
+      .storage_size = TASK_STORAGE_SIZE,
+      .maximum_thread_local_storage_size = MAX_TLS_SIZE,
+      .initial_modes = RTEMS_DEFAULT_MODES,
+      .attributes = TASK_ATTRIBUTES};
+
+  ctx.mutex_id = CreateMutex(rtems_build_name('M', 'U', 'T', 'X'));
+
+  for (uint32_t i = 0; i < TASK_COUNT; i++){
+    ch = '0' + i;
+    calc_task_config.name = rtems_build_name('R', 'U', 'N', ch);
+    calc_task_config.storage_area = &calc_task_storage[i][0];
+
+    ctx.task_id[i] = DoCreateTask(calc_task_config);
+    StartTask(ctx.task_id[i], calc_task_function, &ctx);
+    total_events += event_send[i];
+  }
+  SetSelfPriority( PRIO_NORMAL );
+
+
+//-----------------------------------------------------------------------------
+// Setup the testcase
+//-----------------------------------------------------------------------------
+  fill_main_memory_with_data();
+  l1_dcache_flush();
+  l1_dcache_disable();
+
+#ifdef GR740_ESA_BOARD
+  l2_cache_disable();
+  l2_cache_flush();
+  l2_cache_enable();
+#endif
+
+  l1_dcache_enable();
+  uint32_t control_data_word = warmup_caches();
+
+#ifdef GR740_ESA_BOARD
+  l2_cache_enable_split_responses();
+  clockgating_enable_l4stat();
+  soc_stats_configure_regs();
+  soc_stats_init(&soc_stats);
+#endif
+
+//-----------------------------------------------------------------------------
+// Do the work: distribute the work through the tasks
+//-----------------------------------------------------------------------------
+  start_time = rtems_clock_get_ticks_since_boot();
+  while (ReceiveAvailableEvents() != total_events) {
+    ReceiveMessage(ctx.tile_queue, &task);
+    SendMessage(ctx.message_queue[task], &ctx.next_tile, sizeof(ctx.next_tile));
+    ctx.next_tile++;
+  }
+  end_time = rtems_clock_get_ticks_since_boot();
+  elapsed_time = end_time - start_time;
+
+#ifdef GR740_ESA_BOARD
+  soc_stats_update(&soc_stats);
+#endif
+
+//-----------------------------------------------------------------------------
+// Show Results
+//-----------------------------------------------------------------------------
+  print_string("\n");
+  print_string("Multicore Elapsed Time -");
+  print_string(itoa(elapsed_time, &str[0], 10));
+  print_string("\n");
+
+  for (uint8_t i = 0; i < ctx.ntiles; i++){
+    if (count_process[i] != 1){
+      correctly_processed = false;
+      break;
+    }
+  }
+  if (correctly_processed){
+    print_string("Each tile only processed once             : true\n");
+  }
+  else{
+    print_string("Each tile only processed once             : false\n");
+  }
+
+  print_string("Input Data Result Value                   : 0x");
+  print_string(itoa(control_data_word , &str[0], 16));
+  print_string("\n");
+
+  print_string("Ouput Data Result Value                   : 0x");
+  if (ctx.accxL2>=UINT32_MAX) {
+    print_string(itoa( (int32_t) ((ctx.accxL2 >> 32U) & ((uint64_t)UINT32_MAX)) , &str[0], 16));
+    print_string(itoa( (int32_t) (ctx.accxL2          & ((uint64_t)UINT32_MAX)) , &str[0], 16));
+  }
+  else {
+    print_string(itoa((int32_t)ctx.accxL2  , &str[0], 16));
+  }
+  print_string("\n");
+
+
+#ifdef GR740_ESA_BOARD
+  print_string("L1 Instr Cache misses (read) CPU_0        : ");
+  print_string(itoa(soc_stats.l1_inst_cache_miss[0], &str[0], 10));
+  print_string("\n");
+  print_string("L1 Instr Cache misses (read) CPU_1        : ");
+  print_string(itoa(soc_stats.l1_inst_cache_miss[1], &str[0], 10));
+  print_string("\n");
+  print_string("L1 Instr Cache misses (read) CPU_2        : ");
+  print_string(itoa(soc_stats.l1_inst_cache_miss[2], &str[0], 10));
+  print_string("\n");
+  print_string("L1 Instr Cache misses (read) CPU_3        : ");
+  print_string(itoa(soc_stats.l1_inst_cache_miss[3], &str[0], 10));
+  print_string("\n");
+  print_string("L1 Data Cache misses (read) CPU_0         : ");
+  print_string(itoa(soc_stats.l1_data_cache_miss[0], &str[0], 10));
+  print_string("\n");
+  print_string("L1 Data Cache misses (read) CPU_1         : ");
+  print_string(itoa(soc_stats.l1_data_cache_miss[1], &str[0], 10));
+  print_string("\n");
+  print_string("L1 Data Cache misses (read) CPU_2         : ");
+  print_string(itoa(soc_stats.l1_data_cache_miss[2], &str[0], 10));
+  print_string("\n");
+  print_string("L1 Data Cache misses (read) CPU_3         : ");
+  print_string(itoa(soc_stats.l1_data_cache_miss[3], &str[0], 10));
+  print_string("\n");
+  print_string("L2 Cache hits  (read + writes)            : ");
+  print_string(itoa(soc_stats.l2_cache_hits, &str[0], 10));
+  print_string("\n");
+  print_string("L2 Cache misses (read + writes)           : ");
+  print_string(itoa(soc_stats.l2_cache_miss, &str[0], 10));
+  print_string("\n");
+  print_string("AHB Splits                                : ");
+  print_string(itoa(soc_stats.ahb_split_delay, &str[0], 10));
+  print_string("\n");
+  print_string("\n");
+#endif
+
+// --------------------------------------------------------------------------
+// Delete Objects and Finalize testcase
+// --------------------------------------------------------------------------
+  for (uint32_t i = 0; i < TASK_COUNT; i++){
+    DeleteTask(ctx.task_id[i]);
+    DeleteMessageQueue(ctx.message_queue[i]);
+  }
+  DeleteMessageQueue(ctx.tile_queue);
+
+  DeleteMutex(ctx.mutex_id);
+
+  rtems_fatal(RTEMS_FATAL_SOURCE_EXIT, 0);
+}
+
+#define CONFIGURE_APPLICATION_NEEDS_CLOCK_DRIVER
+
+#define CONFIGURE_MAXIMUM_PROCESSORS TEST_PROCESSORS
+
+#define CONFIGURE_MAXIMUM_MESSAGE_QUEUES MAX_MESSAGE_QUEUES
+
+#define CONFIGURE_MAXIMUM_SEMAPHORES 1
+
+#define CONFIGURE_MAXIMUM_TASKS ( TEST_PROCESSORS + 1 )
+
+#define CONFIGURE_SCHEDULER_EDF_SMP
+
+#define CONFIGURE_MINIMUM_TASK_STACK_SIZE \
+  RTEMS_MINIMUM_STACK_SIZE + CPU_STACK_ALIGNMENT
+
+#define CONFIGURE_EXTRA_TASK_STACKS RTEMS_MINIMUM_STACK_SIZE
+
+#define CONFIGURE_INIT_TASK_CONSTRUCT_STORAGE_SIZE 2 * TASK_STORAGE_SIZE
+
+#define CONFIGURE_MINIMUM_TASKS_WITH_USER_PROVIDED_STORAGE \
+  CONFIGURE_MAXIMUM_TASKS
+
+#define CONFIGURE_MICROSECONDS_PER_TICK 1000
+
+#define CONFIGURE_MAXIMUM_FILE_DESCRIPTORS 0
+
+#define CONFIGURE_DISABLE_NEWLIB_REENTRANCY
+
+#define CONFIGURE_APPLICATION_DISABLE_FILESYSTEM
+
+#define CONFIGURE_MAXIMUM_THREAD_LOCAL_STORAGE_SIZE MAX_TLS_SIZE
+
+#define CONFIGURE_RTEMS_INIT_TASKS_TABLE
+
+#define CONFIGURE_INIT_TASK_ATTRIBUTES (RTEMS_SYSTEM_TASK | TASK_ATTRIBUTES)
+
+#define CONFIGURE_INIT_TASK_INITIAL_MODES RTEMS_DEFAULT_MODES
+
+#define CONFIGURE_INIT
+
+#include <rtems/confdefs.h>