From 258ad71e9626c16f30b40e06c321326636c976ff Mon Sep 17 00:00:00 2001 From: Sebastian Huber Date: Fri, 25 Sep 2015 14:34:24 +0200 Subject: SMP: Fix and optimize thread dispatching According to the C11 and C++11 memory models only a read-modify-write operation guarantees that we read the last value written in modification order. Avoid the sequential consistent thread fence and instead use the inter-processor interrupt to set the thread dispatch necessary indicator. --- c/src/lib/libbsp/arm/shared/arm-a9mpcore-smp.c | 3 +- c/src/lib/libbsp/powerpc/qoriq/startup/bspsmp.c | 5 +++ c/src/lib/libbsp/sparc/shared/irq_asm.S | 20 ++++------ c/src/lib/libcpu/powerpc/new-exceptions/cpu_asm.S | 25 ++++++------ .../powerpc/shared/include/powerpc-utility.h | 9 ++++- cpukit/score/cpu/arm/cpu_asm.S | 27 ++++++------- cpukit/score/cpu/no_cpu/rtems/score/cpu.h | 39 ++++++++++++------- cpukit/score/include/rtems/score/percpu.h | 19 ++++----- cpukit/score/include/rtems/score/smpimpl.h | 6 +++ cpukit/score/include/rtems/score/threadimpl.h | 45 +++++++--------------- testsuites/smptests/smpthreadlife01/init.c | 3 +- 11 files changed, 101 insertions(+), 100 deletions(-) diff --git a/c/src/lib/libbsp/arm/shared/arm-a9mpcore-smp.c b/c/src/lib/libbsp/arm/shared/arm-a9mpcore-smp.c index f2c0201c2c..7e939ff162 100644 --- a/c/src/lib/libbsp/arm/shared/arm-a9mpcore-smp.c +++ b/c/src/lib/libbsp/arm/shared/arm-a9mpcore-smp.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2013 embedded brains GmbH. All rights reserved. + * Copyright (c) 2013-2015 embedded brains GmbH. All rights reserved. * * embedded brains GmbH * Dornierstr. 4 @@ -62,6 +62,7 @@ void _CPU_SMP_Prepare_start_multitasking( void ) void _CPU_SMP_Send_interrupt( uint32_t target_processor_index ) { + _ARM_Data_memory_barrier(); arm_gic_irq_generate_software_irq( ARM_GIC_IRQ_SGI_0, ARM_GIC_IRQ_SOFTWARE_IRQ_TO_ALL_IN_LIST, diff --git a/c/src/lib/libbsp/powerpc/qoriq/startup/bspsmp.c b/c/src/lib/libbsp/powerpc/qoriq/startup/bspsmp.c index 0b0743b35e..8952d3e809 100644 --- a/c/src/lib/libbsp/powerpc/qoriq/startup/bspsmp.c +++ b/c/src/lib/libbsp/powerpc/qoriq/startup/bspsmp.c @@ -235,5 +235,10 @@ void _CPU_SMP_Prepare_start_multitasking(void) void _CPU_SMP_Send_interrupt(uint32_t target_processor_index) { +#ifdef __PPC_CPU_E6500__ + ppc_light_weight_synchronize(); +#else + ppc_synchronize_data(); +#endif qoriq.pic.ipidr [IPI_INDEX].reg = 1U << target_processor_index; } diff --git a/c/src/lib/libbsp/sparc/shared/irq_asm.S b/c/src/lib/libbsp/sparc/shared/irq_asm.S index f7222e7623..9d8600e0fe 100644 --- a/c/src/lib/libbsp/sparc/shared/irq_asm.S +++ b/c/src/lib/libbsp/sparc/shared/irq_asm.S @@ -236,24 +236,18 @@ check_is_executing: beq try_update_is_executing mov 1, %g1 - ! Check if a thread dispatch is necessary - ldub [%g6 + PER_CPU_DISPATCH_NEEDED], %g1 - cmp %g1, 0 - beq check_is_executing - nop - - ! We have a new heir - - ! Clear the thread dispatch necessary flag - stub %g0, [%g6 + PER_CPU_DISPATCH_NEEDED] - - ! Here we assume a strong memory order, otherwise a memory barrier must - ! be inserted here + ! We may have a new heir ! Read the executing and heir ld [%g6 + PER_CPU_OFFSET_EXECUTING], %g1 ld [%g6 + PER_CPU_OFFSET_HEIR], %g2 + ! Update the executing only if necessary to avoid cache line + ! monopolization. + cmp %g1, %g2 + beq try_update_is_executing + mov 1, %g1 + ! Calculate the heir context pointer sub %o1, %g1, %g1 add %g1, %g2, %o1 diff --git a/c/src/lib/libcpu/powerpc/new-exceptions/cpu_asm.S b/c/src/lib/libcpu/powerpc/new-exceptions/cpu_asm.S index 5d8c70d290..8bfef20043 100644 --- a/c/src/lib/libcpu/powerpc/new-exceptions/cpu_asm.S +++ b/c/src/lib/libcpu/powerpc/new-exceptions/cpu_asm.S @@ -413,12 +413,12 @@ check_is_executing: addi r6, r5, PPC_CONTEXT_OFFSET_IS_EXECUTING lwarx r7, r0, r6 cmpwi r7, 0 - bne check_thread_dispatch_necessary + bne get_potential_new_heir /* Try to update the is executing indicator of the heir context */ li r7, 1 stwcx. r7, r0, r6 - bne check_thread_dispatch_necessary + bne get_potential_new_heir isync #endif @@ -536,26 +536,23 @@ PROC (_CPU_Context_restore): b restore_context #ifdef RTEMS_SMP -check_thread_dispatch_necessary: +get_potential_new_heir: GET_SELF_CPU_CONTROL r6 - /* Check if a thread dispatch is necessary */ - lbz r7, PER_CPU_DISPATCH_NEEDED(r6) - cmpwi r7, 0 - beq check_is_executing - - /* We have a new heir */ - - /* Clear the thread dispatch necessary flag */ - li r7, 0 - stb r7, PER_CPU_DISPATCH_NEEDED(r6) - msync + /* We may have a new heir */ /* Read the executing and heir */ lwz r7, PER_CPU_OFFSET_EXECUTING(r6) lwz r8, PER_CPU_OFFSET_HEIR(r6) + /* + * Update the executing only if necessary to avoid cache line + * monopolization. + */ + cmpw r7, r8 + beq check_is_executing + /* Calculate the heir context pointer */ sub r7, r4, r7 add r4, r8, r7 diff --git a/c/src/lib/libcpu/powerpc/shared/include/powerpc-utility.h b/c/src/lib/libcpu/powerpc/shared/include/powerpc-utility.h index 331aa58057..b8dc5f40ce 100644 --- a/c/src/lib/libcpu/powerpc/shared/include/powerpc-utility.h +++ b/c/src/lib/libcpu/powerpc/shared/include/powerpc-utility.h @@ -8,7 +8,7 @@ */ /* - * Copyright (c) 2008-2014 embedded brains GmbH. + * Copyright (c) 2008-2015 embedded brains GmbH. * * embedded brains GmbH * Dornierstr. 4 @@ -206,6 +206,13 @@ static inline void ppc_synchronize_data(void) __asm__ volatile ("sync"); } +static inline void ppc_light_weight_synchronize(void) +{ + RTEMS_COMPILER_MEMORY_BARRIER(); + + __asm__ volatile ("lwsync"); +} + static inline void ppc_synchronize_instructions(void) { RTEMS_COMPILER_MEMORY_BARRIER(); diff --git a/cpukit/score/cpu/arm/cpu_asm.S b/cpukit/score/cpu/arm/cpu_asm.S index 344512b395..6ee9e7418f 100644 --- a/cpukit/score/cpu/arm/cpu_asm.S +++ b/cpukit/score/cpu/arm/cpu_asm.S @@ -19,7 +19,7 @@ * COPYRIGHT (c) 2000 Canon Research Centre France SA. * Emmanuel Raguet, mailto:raguet@crf.canon.fr * - * Copyright (c) 2013-2014 embedded brains GmbH + * Copyright (c) 2013-2015 embedded brains GmbH * * The license and distribution terms for this file may be * found in the file LICENSE in this distribution or at @@ -80,13 +80,13 @@ DEFINE_FUNCTION_ARM(_CPU_Context_switch) add r3, r1, #ARM_CONTEXT_CONTROL_IS_EXECUTING_OFFSET ldrexb r4, [r3] cmp r4, #0 - bne .L_check_thread_dispatch_necessary + bne .L_get_potential_new_heir /* Try to update the is executing indicator of the heir context */ mov r4, #1 strexb r5, r4, [r3] cmp r5, #0 - bne .L_check_thread_dispatch_necessary + bne .L_get_potential_new_heir dmb #endif @@ -126,26 +126,23 @@ DEFINE_FUNCTION_ARM(_CPU_Context_restore) b .L_restore #ifdef RTEMS_SMP -.L_check_thread_dispatch_necessary: +.L_get_potential_new_heir: GET_SELF_CPU_CONTROL r2, r3 - /* Check if a thread dispatch is necessary */ - ldrb r4, [r2, #PER_CPU_DISPATCH_NEEDED] - cmp r4, #0 - beq .L_check_is_executing - - /* We have a new heir */ - - /* Clear the thread dispatch necessary flag */ - mov r4, #0 - strb r4, [r2, #PER_CPU_DISPATCH_NEEDED] - dmb + /* We may have a new heir */ /* Read the executing and heir */ ldr r4, [r2, #PER_CPU_OFFSET_EXECUTING] ldr r5, [r2, #PER_CPU_OFFSET_HEIR] + /* + * Update the executing only if necessary to avoid cache line + * monopolization. + */ + cmp r4, r5 + beq .L_check_is_executing + /* Calculate the heir context pointer */ sub r4, r1, r4 add r1, r5, r4 diff --git a/cpukit/score/cpu/no_cpu/rtems/score/cpu.h b/cpukit/score/cpu/no_cpu/rtems/score/cpu.h index f556087f25..37557096bc 100644 --- a/cpukit/score/cpu/no_cpu/rtems/score/cpu.h +++ b/cpukit/score/cpu/no_cpu/rtems/score/cpu.h @@ -559,34 +559,41 @@ typedef struct { * * This field must be updated during a context switch. The context switch * to the heir must wait until the heir context indicates that it is no - * longer executing on a processor. The context switch must also check if - * a thread dispatch is necessary to honor updates of the heir thread for - * this processor. This indicator must be updated using an atomic test and - * set operation to ensure that at most one processor uses the heir - * context at the same time. + * longer executing on a processor. This indicator must be updated using + * an atomic test and set operation to ensure that at most one processor + * uses the heir context at the same time. The context switch must also + * check for a potential new heir thread for this processor in case the + * heir context is not immediately available. Update the executing thread + * for this processor only if necessary to avoid a cache line + * monopolization. * * @code * void _CPU_Context_switch( - * Context_Control *executing, - * Context_Control *heir + * Context_Control *executing_context, + * Context_Control *heir_context * ) * { - * save( executing ); + * save( executing_context ); * - * executing->is_executing = false; + * executing_context->is_executing = false; * memory_barrier(); * - * if ( test_and_set( &heir->is_executing ) ) { + * if ( test_and_set( &heir_context->is_executing ) ) { * do { * Per_CPU_Control *cpu_self = _Per_CPU_Get_snapshot(); + * Thread_Control *executing = cpu_self->executing; + * Thread_Control *heir = cpu_self->heir; * - * if ( cpu_self->dispatch_necessary ) { - * heir = _Thread_Get_heir_and_make_it_executing( cpu_self ); + * if ( heir != executing ) { + * cpu_self->executing = heir; + * heir_context = (Context_Control *) + * ((uintptr_t) heir + (uintptr_t) executing_context + * - (uintptr_t) executing) * } - * } while ( test_and_set( &heir->is_executing ) ); + * } while ( test_and_set( &heir_context->is_executing ) ); * } * - * restore( heir ); + * restore( heir_context ); * } * @endcode */ @@ -1578,6 +1585,10 @@ register struct Per_CPU_Control *_CPU_Per_CPU_current asm( "rX" ); * @brief Sends an inter-processor interrupt to the specified target * processor. * + * This interrupt send and the corresponding inter-processor interrupt must + * act as an release/acquire barrier so that all values written by the + * sending processor are visible to the target processor. + * * This operation is undefined for target processor indices out of range. * * @param[in] target_processor_index The target processor index. diff --git a/cpukit/score/include/rtems/score/percpu.h b/cpukit/score/include/rtems/score/percpu.h index f1dad90ccc..806c290b7c 100644 --- a/cpukit/score/include/rtems/score/percpu.h +++ b/cpukit/score/include/rtems/score/percpu.h @@ -279,9 +279,11 @@ typedef struct Per_CPU_Control { * @brief This is the heir thread for this processor. * * This field is not protected by a lock. The only writer after multitasking - * start is the scheduler owning this processor. This processor will set the - * dispatch necessary indicator to false, before it reads the heir. This - * field is used in combination with the dispatch necessary indicator. + * start is the scheduler owning this processor. It is assumed that stores + * to pointers are atomic on all supported SMP architectures. The CPU port + * specific code (inter-processor interrupt handling and + * _CPU_SMP_Send_interrupt()) must guarantee that this processor observes the + * last value written. * * A thread can be a heir on at most one processor in the system. * @@ -290,16 +292,15 @@ typedef struct Per_CPU_Control { struct _Thread_Control *heir; /** - * @brief This is set to true when this processor needs to run the + * @brief This is set to true when this processor needs to run the thread * dispatcher. * * It is volatile since interrupts may alter this flag. * - * This field is not protected by a lock. There are two writers after - * multitasking start. The scheduler owning this processor sets this - * indicator to true, after it updated the heir field. This processor sets - * this indicator to false, before it reads the heir. This field is used in - * combination with the heir field. + * This field is not protected by a lock and must be accessed only by this + * processor. Code (e.g. scheduler and post-switch action requests) running + * on another processors must use an inter-processor interrupt to set the + * thread dispatch necessary indicator to true. * * @see _Thread_Get_heir_and_make_it_executing(). */ diff --git a/cpukit/score/include/rtems/score/smpimpl.h b/cpukit/score/include/rtems/score/smpimpl.h index 97c78b02b0..3167e82a82 100644 --- a/cpukit/score/include/rtems/score/smpimpl.h +++ b/cpukit/score/include/rtems/score/smpimpl.h @@ -146,6 +146,12 @@ static inline void _SMP_Inter_processor_interrupt_handler( void ) { Per_CPU_Control *cpu_self = _Per_CPU_Get(); + /* + * In the common case the inter-processor interrupt is issued to carry out a + * thread dispatch. + */ + cpu_self->dispatch_necessary = true; + if ( _Atomic_Load_ulong( &cpu_self->message, ATOMIC_ORDER_RELAXED ) != 0 ) { unsigned long message = _Atomic_Exchange_ulong( &cpu_self->message, diff --git a/cpukit/score/include/rtems/score/threadimpl.h b/cpukit/score/include/rtems/score/threadimpl.h index 68c26c388f..7412bd9633 100644 --- a/cpukit/score/include/rtems/score/threadimpl.h +++ b/cpukit/score/include/rtems/score/threadimpl.h @@ -11,7 +11,7 @@ * COPYRIGHT (c) 1989-2008. * On-Line Applications Research Corporation (OAR). * - * Copyright (c) 2014 embedded brains GmbH. + * Copyright (c) 2014-2015 embedded brains GmbH. * * The license and distribution terms for this file may be * found in the file LICENSE in this distribution or at @@ -793,7 +793,8 @@ RTEMS_INLINE_ROUTINE Thread_Control *_Thread_Internal_allocate( void ) /** * @brief Gets the heir of the processor and makes it executing. * - * The thread dispatch necessary indicator is cleared as a side-effect. + * Must be called with interrupts disabled. The thread dispatch necessary + * indicator is cleared as a side-effect. * * @return The heir thread. * @@ -806,18 +807,8 @@ RTEMS_INLINE_ROUTINE Thread_Control *_Thread_Get_heir_and_make_it_executing( { Thread_Control *heir; - cpu_self->dispatch_necessary = false; - -#if defined( RTEMS_SMP ) - /* - * It is critical that we first update the dispatch necessary and then the - * read the heir so that we don't miss an update by - * _Thread_Dispatch_update_heir(). - */ - _Atomic_Fence( ATOMIC_ORDER_SEQ_CST ); -#endif - heir = cpu_self->heir; + cpu_self->dispatch_necessary = false; cpu_self->executing = heir; return heir; @@ -832,23 +823,10 @@ RTEMS_INLINE_ROUTINE void _Thread_Dispatch_update_heir( { cpu_for_heir->heir = heir; - /* - * It is critical that we first update the heir and then the dispatch - * necessary so that _Thread_Get_heir_and_make_it_executing() cannot miss an - * update. - */ - _Atomic_Fence( ATOMIC_ORDER_SEQ_CST ); - - /* - * Only update the dispatch necessary indicator if not already set to - * avoid superfluous inter-processor interrupts. - */ - if ( !cpu_for_heir->dispatch_necessary ) { - cpu_for_heir->dispatch_necessary = true; - - if ( cpu_for_heir != cpu_self ) { - _Per_CPU_Send_interrupt( cpu_for_heir ); - } + if ( cpu_for_heir == cpu_self ) { + cpu_self->dispatch_necessary = true; + } else { + _Per_CPU_Send_interrupt( cpu_for_heir ); } } #endif @@ -930,12 +908,15 @@ RTEMS_INLINE_ROUTINE void _Thread_Add_post_switch_action( ISR_Level level; cpu_of_thread = _Thread_Action_ISR_disable_and_acquire( thread, &level ); - cpu_of_thread->dispatch_necessary = true; #if defined(RTEMS_SMP) - if ( _Per_CPU_Get() != cpu_of_thread ) { + if ( _Per_CPU_Get() == cpu_of_thread ) { + cpu_of_thread->dispatch_necessary = true; + } else { _Per_CPU_Send_interrupt( cpu_of_thread ); } +#else + cpu_of_thread->dispatch_necessary = true; #endif _Chain_Append_if_is_off_chain_unprotected( diff --git a/testsuites/smptests/smpthreadlife01/init.c b/testsuites/smptests/smpthreadlife01/init.c index 12b6bd9f44..4597520141 100644 --- a/testsuites/smptests/smpthreadlife01/init.c +++ b/testsuites/smptests/smpthreadlife01/init.c @@ -201,7 +201,6 @@ static void delay_ipi_task(rtems_task_argument variant) ISR_Level level; _ISR_Disable_without_giant(level); - (void) level; /* (C) */ barrier(ctx, &ctx->worker_barrier_state); @@ -216,6 +215,8 @@ static void delay_ipi_task(rtems_task_argument variant) _Thread_Disable_dispatch(); } + _ISR_Enable_without_giant(level); + /* * We get deleted as a side effect of enabling the thread life protection or * later if we enable the thread dispatching. -- cgit v1.2.3