diff options
author | Sebastian Huber <sebastian.huber@embedded-brains.de> | 2015-09-25 14:34:24 +0200 |
---|---|---|
committer | Sebastian Huber <sebastian.huber@embedded-brains.de> | 2015-09-28 13:56:57 +0200 |
commit | 258ad71e9626c16f30b40e06c321326636c976ff (patch) | |
tree | da6e210947d590159796434bf04cf364247ac20a /cpukit | |
parent | SMP: Simplify thread lock operations (diff) | |
download | rtems-258ad71e9626c16f30b40e06c321326636c976ff.tar.bz2 |
SMP: Fix and optimize thread dispatching
According to the C11 and C++11 memory models only a read-modify-write
operation guarantees that we read the last value written in modification
order. Avoid the sequential consistent thread fence and instead use the
inter-processor interrupt to set the thread dispatch necessary
indicator.
Diffstat (limited to '')
-rw-r--r-- | cpukit/score/cpu/arm/cpu_asm.S | 27 | ||||
-rw-r--r-- | cpukit/score/cpu/no_cpu/rtems/score/cpu.h | 39 | ||||
-rw-r--r-- | cpukit/score/include/rtems/score/percpu.h | 19 | ||||
-rw-r--r-- | cpukit/score/include/rtems/score/smpimpl.h | 6 | ||||
-rw-r--r-- | cpukit/score/include/rtems/score/threadimpl.h | 45 |
5 files changed, 66 insertions, 70 deletions
diff --git a/cpukit/score/cpu/arm/cpu_asm.S b/cpukit/score/cpu/arm/cpu_asm.S index 344512b395..6ee9e7418f 100644 --- a/cpukit/score/cpu/arm/cpu_asm.S +++ b/cpukit/score/cpu/arm/cpu_asm.S @@ -19,7 +19,7 @@ * COPYRIGHT (c) 2000 Canon Research Centre France SA. * Emmanuel Raguet, mailto:raguet@crf.canon.fr * - * Copyright (c) 2013-2014 embedded brains GmbH + * Copyright (c) 2013-2015 embedded brains GmbH * * The license and distribution terms for this file may be * found in the file LICENSE in this distribution or at @@ -80,13 +80,13 @@ DEFINE_FUNCTION_ARM(_CPU_Context_switch) add r3, r1, #ARM_CONTEXT_CONTROL_IS_EXECUTING_OFFSET ldrexb r4, [r3] cmp r4, #0 - bne .L_check_thread_dispatch_necessary + bne .L_get_potential_new_heir /* Try to update the is executing indicator of the heir context */ mov r4, #1 strexb r5, r4, [r3] cmp r5, #0 - bne .L_check_thread_dispatch_necessary + bne .L_get_potential_new_heir dmb #endif @@ -126,26 +126,23 @@ DEFINE_FUNCTION_ARM(_CPU_Context_restore) b .L_restore #ifdef RTEMS_SMP -.L_check_thread_dispatch_necessary: +.L_get_potential_new_heir: GET_SELF_CPU_CONTROL r2, r3 - /* Check if a thread dispatch is necessary */ - ldrb r4, [r2, #PER_CPU_DISPATCH_NEEDED] - cmp r4, #0 - beq .L_check_is_executing - - /* We have a new heir */ - - /* Clear the thread dispatch necessary flag */ - mov r4, #0 - strb r4, [r2, #PER_CPU_DISPATCH_NEEDED] - dmb + /* We may have a new heir */ /* Read the executing and heir */ ldr r4, [r2, #PER_CPU_OFFSET_EXECUTING] ldr r5, [r2, #PER_CPU_OFFSET_HEIR] + /* + * Update the executing only if necessary to avoid cache line + * monopolization. + */ + cmp r4, r5 + beq .L_check_is_executing + /* Calculate the heir context pointer */ sub r4, r1, r4 add r1, r5, r4 diff --git a/cpukit/score/cpu/no_cpu/rtems/score/cpu.h b/cpukit/score/cpu/no_cpu/rtems/score/cpu.h index f556087f25..37557096bc 100644 --- a/cpukit/score/cpu/no_cpu/rtems/score/cpu.h +++ b/cpukit/score/cpu/no_cpu/rtems/score/cpu.h @@ -559,34 +559,41 @@ typedef struct { * * This field must be updated during a context switch. The context switch * to the heir must wait until the heir context indicates that it is no - * longer executing on a processor. The context switch must also check if - * a thread dispatch is necessary to honor updates of the heir thread for - * this processor. This indicator must be updated using an atomic test and - * set operation to ensure that at most one processor uses the heir - * context at the same time. + * longer executing on a processor. This indicator must be updated using + * an atomic test and set operation to ensure that at most one processor + * uses the heir context at the same time. The context switch must also + * check for a potential new heir thread for this processor in case the + * heir context is not immediately available. Update the executing thread + * for this processor only if necessary to avoid a cache line + * monopolization. * * @code * void _CPU_Context_switch( - * Context_Control *executing, - * Context_Control *heir + * Context_Control *executing_context, + * Context_Control *heir_context * ) * { - * save( executing ); + * save( executing_context ); * - * executing->is_executing = false; + * executing_context->is_executing = false; * memory_barrier(); * - * if ( test_and_set( &heir->is_executing ) ) { + * if ( test_and_set( &heir_context->is_executing ) ) { * do { * Per_CPU_Control *cpu_self = _Per_CPU_Get_snapshot(); + * Thread_Control *executing = cpu_self->executing; + * Thread_Control *heir = cpu_self->heir; * - * if ( cpu_self->dispatch_necessary ) { - * heir = _Thread_Get_heir_and_make_it_executing( cpu_self ); + * if ( heir != executing ) { + * cpu_self->executing = heir; + * heir_context = (Context_Control *) + * ((uintptr_t) heir + (uintptr_t) executing_context + * - (uintptr_t) executing) * } - * } while ( test_and_set( &heir->is_executing ) ); + * } while ( test_and_set( &heir_context->is_executing ) ); * } * - * restore( heir ); + * restore( heir_context ); * } * @endcode */ @@ -1578,6 +1585,10 @@ register struct Per_CPU_Control *_CPU_Per_CPU_current asm( "rX" ); * @brief Sends an inter-processor interrupt to the specified target * processor. * + * This interrupt send and the corresponding inter-processor interrupt must + * act as an release/acquire barrier so that all values written by the + * sending processor are visible to the target processor. + * * This operation is undefined for target processor indices out of range. * * @param[in] target_processor_index The target processor index. diff --git a/cpukit/score/include/rtems/score/percpu.h b/cpukit/score/include/rtems/score/percpu.h index f1dad90ccc..806c290b7c 100644 --- a/cpukit/score/include/rtems/score/percpu.h +++ b/cpukit/score/include/rtems/score/percpu.h @@ -279,9 +279,11 @@ typedef struct Per_CPU_Control { * @brief This is the heir thread for this processor. * * This field is not protected by a lock. The only writer after multitasking - * start is the scheduler owning this processor. This processor will set the - * dispatch necessary indicator to false, before it reads the heir. This - * field is used in combination with the dispatch necessary indicator. + * start is the scheduler owning this processor. It is assumed that stores + * to pointers are atomic on all supported SMP architectures. The CPU port + * specific code (inter-processor interrupt handling and + * _CPU_SMP_Send_interrupt()) must guarantee that this processor observes the + * last value written. * * A thread can be a heir on at most one processor in the system. * @@ -290,16 +292,15 @@ typedef struct Per_CPU_Control { struct _Thread_Control *heir; /** - * @brief This is set to true when this processor needs to run the + * @brief This is set to true when this processor needs to run the thread * dispatcher. * * It is volatile since interrupts may alter this flag. * - * This field is not protected by a lock. There are two writers after - * multitasking start. The scheduler owning this processor sets this - * indicator to true, after it updated the heir field. This processor sets - * this indicator to false, before it reads the heir. This field is used in - * combination with the heir field. + * This field is not protected by a lock and must be accessed only by this + * processor. Code (e.g. scheduler and post-switch action requests) running + * on another processors must use an inter-processor interrupt to set the + * thread dispatch necessary indicator to true. * * @see _Thread_Get_heir_and_make_it_executing(). */ diff --git a/cpukit/score/include/rtems/score/smpimpl.h b/cpukit/score/include/rtems/score/smpimpl.h index 97c78b02b0..3167e82a82 100644 --- a/cpukit/score/include/rtems/score/smpimpl.h +++ b/cpukit/score/include/rtems/score/smpimpl.h @@ -146,6 +146,12 @@ static inline void _SMP_Inter_processor_interrupt_handler( void ) { Per_CPU_Control *cpu_self = _Per_CPU_Get(); + /* + * In the common case the inter-processor interrupt is issued to carry out a + * thread dispatch. + */ + cpu_self->dispatch_necessary = true; + if ( _Atomic_Load_ulong( &cpu_self->message, ATOMIC_ORDER_RELAXED ) != 0 ) { unsigned long message = _Atomic_Exchange_ulong( &cpu_self->message, diff --git a/cpukit/score/include/rtems/score/threadimpl.h b/cpukit/score/include/rtems/score/threadimpl.h index 68c26c388f..7412bd9633 100644 --- a/cpukit/score/include/rtems/score/threadimpl.h +++ b/cpukit/score/include/rtems/score/threadimpl.h @@ -11,7 +11,7 @@ * COPYRIGHT (c) 1989-2008. * On-Line Applications Research Corporation (OAR). * - * Copyright (c) 2014 embedded brains GmbH. + * Copyright (c) 2014-2015 embedded brains GmbH. * * The license and distribution terms for this file may be * found in the file LICENSE in this distribution or at @@ -793,7 +793,8 @@ RTEMS_INLINE_ROUTINE Thread_Control *_Thread_Internal_allocate( void ) /** * @brief Gets the heir of the processor and makes it executing. * - * The thread dispatch necessary indicator is cleared as a side-effect. + * Must be called with interrupts disabled. The thread dispatch necessary + * indicator is cleared as a side-effect. * * @return The heir thread. * @@ -806,18 +807,8 @@ RTEMS_INLINE_ROUTINE Thread_Control *_Thread_Get_heir_and_make_it_executing( { Thread_Control *heir; - cpu_self->dispatch_necessary = false; - -#if defined( RTEMS_SMP ) - /* - * It is critical that we first update the dispatch necessary and then the - * read the heir so that we don't miss an update by - * _Thread_Dispatch_update_heir(). - */ - _Atomic_Fence( ATOMIC_ORDER_SEQ_CST ); -#endif - heir = cpu_self->heir; + cpu_self->dispatch_necessary = false; cpu_self->executing = heir; return heir; @@ -832,23 +823,10 @@ RTEMS_INLINE_ROUTINE void _Thread_Dispatch_update_heir( { cpu_for_heir->heir = heir; - /* - * It is critical that we first update the heir and then the dispatch - * necessary so that _Thread_Get_heir_and_make_it_executing() cannot miss an - * update. - */ - _Atomic_Fence( ATOMIC_ORDER_SEQ_CST ); - - /* - * Only update the dispatch necessary indicator if not already set to - * avoid superfluous inter-processor interrupts. - */ - if ( !cpu_for_heir->dispatch_necessary ) { - cpu_for_heir->dispatch_necessary = true; - - if ( cpu_for_heir != cpu_self ) { - _Per_CPU_Send_interrupt( cpu_for_heir ); - } + if ( cpu_for_heir == cpu_self ) { + cpu_self->dispatch_necessary = true; + } else { + _Per_CPU_Send_interrupt( cpu_for_heir ); } } #endif @@ -930,12 +908,15 @@ RTEMS_INLINE_ROUTINE void _Thread_Add_post_switch_action( ISR_Level level; cpu_of_thread = _Thread_Action_ISR_disable_and_acquire( thread, &level ); - cpu_of_thread->dispatch_necessary = true; #if defined(RTEMS_SMP) - if ( _Per_CPU_Get() != cpu_of_thread ) { + if ( _Per_CPU_Get() == cpu_of_thread ) { + cpu_of_thread->dispatch_necessary = true; + } else { _Per_CPU_Send_interrupt( cpu_of_thread ); } +#else + cpu_of_thread->dispatch_necessary = true; #endif _Chain_Append_if_is_off_chain_unprotected( |