From c236082873cb4a2fd42af4ca0868106e1dd65422 Mon Sep 17 00:00:00 2001 From: Sebastian Huber Date: Tue, 30 Jul 2013 15:54:53 +0200 Subject: smp: Provide cache optimized Per_CPU_Control Delete _Per_CPU_Information_p. --- c/src/lib/libbsp/i386/shared/irq/irq_asm.S | 5 ++-- c/src/lib/libbsp/sparc/shared/irq_asm.S | 23 ++++++++------- cpukit/sapi/include/confdefs.h | 3 +- cpukit/score/include/rtems/score/percpu.h | 46 +++++++++++++++++++++--------- cpukit/score/src/percpu.c | 5 +--- cpukit/score/src/percpuasm.c | 9 +++++- 6 files changed, 57 insertions(+), 34 deletions(-) diff --git a/c/src/lib/libbsp/i386/shared/irq/irq_asm.S b/c/src/lib/libbsp/i386/shared/irq/irq_asm.S index 2b16234622..bbc1afb5d6 100644 --- a/c/src/lib/libbsp/i386/shared/irq/irq_asm.S +++ b/c/src/lib/libbsp/i386/shared/irq/irq_asm.S @@ -99,9 +99,10 @@ SYM (_ISR_Handler): .check_stack_switch: movl esp, ebp /* ebp = previous stack pointer */ #if defined(RTEMS_SMP) && defined(BSP_HAS_SMP) - movl $SYM(_Per_CPU_Information_p), ebx call SYM(_CPU_SMP_Get_current_processor) - mov (ebx,eax,4), ebx + sall $PER_CPU_CONTROL_SIZE_LOG2, eax + addl $SYM(_Per_CPU_Information), eax + movl eax, ebx pushl ecx call SYM(_ISR_SMP_Enter) popl ecx diff --git a/c/src/lib/libbsp/sparc/shared/irq_asm.S b/c/src/lib/libbsp/sparc/shared/irq_asm.S index 5fb6a67682..eb2f9c593d 100644 --- a/c/src/lib/libbsp/sparc/shared/irq_asm.S +++ b/c/src/lib/libbsp/sparc/shared/irq_asm.S @@ -255,16 +255,18 @@ dont_fix_pil2: SYM(_ISR_PER_CPU): #if defined(RTEMS_SMP) - sethi %hi(_Per_CPU_Information_p), %l5 - add %l5, %lo(_Per_CPU_Information_p), %l5 + sethi %hi(_Per_CPU_Information), %l5 + add %l5, %lo(_Per_CPU_Information), %l5 #if BSP_LEON3_SMP /* LEON3 SMP support */ rd %asr17, %l7 srl %l7, 28, %l7 /* CPU number is upper 4 bits so shift */ - sll %l7, 2, %l7 /* l7 = offset */ - add %l5, %l7, %l5 + #else + mov 0, %l7 + nop #endif - ld [%l5], %l5 /* l5 = pointer to per CPU */ + sll %l7, PER_CPU_CONTROL_SIZE_LOG2, %l7 /* l7 = offset */ + add %l5, %l7, %l5 /* l5 = pointer to per CPU */ /* * On multi-core system, we need to use SMP safe versions @@ -456,19 +458,18 @@ isr_dispatch: */ #if defined(RTEMS_SMP) - sethi %hi(_Per_CPU_Information_p), %l5 - ld [%l5 + %lo(_Per_CPU_Information_p)], %l5 + sethi %hi(_Per_CPU_Information), %l5 + add %l5, %lo(_Per_CPU_Information), %l5 #if BSP_LEON3_SMP /* LEON3 SMP support */ rd %asr17, %l7 srl %l7, 28, %l7 /* CPU number is upper 4 bits so shift */ - sll %l7, 2, %l7 /* l7 = offset */ - add %l5, %l7, %l5 #else - nop + mov 0, %l7 nop #endif - ld [%l5], %l5 /* l5 = pointer to per CPU */ + sll %l7, PER_CPU_CONTROL_SIZE_LOG2, %l7 /* l7 = offset */ + add %l5, %l7, %l5 /* l5 = pointer to per CPU */ #else sethi %hi(_Per_CPU_Information), %l5 add %l5, %lo(_Per_CPU_Information), %l5 diff --git a/cpukit/sapi/include/confdefs.h b/cpukit/sapi/include/confdefs.h index c969fa78de..ed1385a3c9 100644 --- a/cpukit/sapi/include/confdefs.h +++ b/cpukit/sapi/include/confdefs.h @@ -2396,8 +2396,7 @@ const rtems_libio_helper rtems_fs_init_helper = * Instantiate the Per CPU information based upon the user configuration. */ #if defined(CONFIGURE_INIT) - Per_CPU_Control _Per_CPU_Information[CONFIGURE_SMP_MAXIMUM_PROCESSORS]; - Per_CPU_Control *_Per_CPU_Information_p[CONFIGURE_SMP_MAXIMUM_PROCESSORS]; + Per_CPU_Control_envelope _Per_CPU_Information[CONFIGURE_SMP_MAXIMUM_PROCESSORS]; #endif #endif diff --git a/cpukit/score/include/rtems/score/percpu.h b/cpukit/score/include/rtems/score/percpu.h index 11ba5f6681..14eed5a57f 100644 --- a/cpukit/score/include/rtems/score/percpu.h +++ b/cpukit/score/include/rtems/score/percpu.h @@ -33,6 +33,18 @@ extern "C" { #endif +#if defined( RTEMS_SMP ) + /* + * This ensures that on SMP configurations the individual per-CPU controls + * are on different cache lines to prevent false sharing. This define can be + * used in assembler code to easily get the per-CPU control for a particular + * processor. + */ + #define PER_CPU_CONTROL_SIZE_LOG2 7 + + #define PER_CPU_CONTROL_SIZE ( 1 << PER_CPU_CONTROL_SIZE_LOG2 ) +#endif + #if !defined( ASM ) #ifndef __THREAD_CONTROL_DEFINED__ @@ -184,32 +196,47 @@ typedef struct { #endif } Per_CPU_Control; +#if defined( RTEMS_SMP ) +typedef struct { + Per_CPU_Control per_cpu; + char unused_space_for_cache_line_alignment + [ PER_CPU_CONTROL_SIZE - sizeof( Per_CPU_Control ) ]; +} Per_CPU_Control_envelope; +#else +typedef struct { + Per_CPU_Control per_cpu; +} Per_CPU_Control_envelope; +#endif + /** * @brief Set of Per CPU Core Information * * This is an array of per CPU core information. */ -extern Per_CPU_Control _Per_CPU_Information[] CPU_STRUCTURE_ALIGNMENT; +extern Per_CPU_Control_envelope _Per_CPU_Information[] CPU_STRUCTURE_ALIGNMENT; #if defined( RTEMS_SMP ) static inline Per_CPU_Control *_Per_CPU_Get( void ) { _Assert_Thread_dispatching_repressed(); - return &_Per_CPU_Information[ _SMP_Get_current_processor() ]; + return &_Per_CPU_Information[ _SMP_Get_current_processor() ].per_cpu; } #else -#define _Per_CPU_Get() ( &_Per_CPU_Information[ 0 ] ) +#define _Per_CPU_Get() ( &_Per_CPU_Information[ 0 ].per_cpu ) #endif static inline Per_CPU_Control *_Per_CPU_Get_by_index( uint32_t index ) { - return &_Per_CPU_Information[ index ]; + return &_Per_CPU_Information[ index ].per_cpu; } static inline uint32_t _Per_CPU_Get_index( const Per_CPU_Control *per_cpu ) { - return ( uint32_t ) ( per_cpu - &_Per_CPU_Information[ 0 ] ); + const Per_CPU_Control_envelope *per_cpu_envelope = + ( const Per_CPU_Control_envelope * ) per_cpu; + + return ( uint32_t ) ( per_cpu_envelope - &_Per_CPU_Information[ 0 ] ); } #if defined( RTEMS_SMP ) @@ -219,15 +246,6 @@ static inline void _Per_CPU_Send_interrupt( const Per_CPU_Control *per_cpu ) _CPU_SMP_Send_interrupt( _Per_CPU_Get_index( per_cpu ) ); } -/** - * @brief Set of Pointers to Per CPU Core Information - * - * This is an array of pointers to each CPU's per CPU data structure. - * It should be simpler to retrieve this pointer in assembly language - * that to calculate the array offset. - */ -extern Per_CPU_Control *_Per_CPU_Information_p[]; - /** * @brief Initialize SMP Handler * diff --git a/cpukit/score/src/percpu.c b/cpukit/score/src/percpu.c index 5e1a917e1b..b041b45197 100644 --- a/cpukit/score/src/percpu.c +++ b/cpukit/score/src/percpu.c @@ -39,13 +39,10 @@ /* * Initialize per cpu pointer table */ - _Per_CPU_Information_p[0] = _Per_CPU_Get_by_index( 0 ); for ( cpu = 1 ; cpu < max_cpus; ++cpu ) { Per_CPU_Control *p = _Per_CPU_Get_by_index( cpu ); - _Per_CPU_Information_p[cpu] = p; - #if CPU_ALLOCATE_INTERRUPT_STACK == TRUE { size_t size = rtems_configuration_get_interrupt_stack_size(); @@ -101,5 +98,5 @@ * statically allocated per cpu structure. And the fields are initialized * as individual elements just like it has always been done. */ - Per_CPU_Control _Per_CPU_Information[1]; + Per_CPU_Control_envelope _Per_CPU_Information[1]; #endif diff --git a/cpukit/score/src/percpuasm.c b/cpukit/score/src/percpuasm.c index a79936bab4..60ca48dc5a 100644 --- a/cpukit/score/src/percpuasm.c +++ b/cpukit/score/src/percpuasm.c @@ -26,13 +26,20 @@ RTEMS_STATIC_ASSERT( CPU_SIZEOF_POINTER ); -#ifdef __SIZEOF_POINTER__ +#if defined( __SIZEOF_POINTER__ ) RTEMS_STATIC_ASSERT( CPU_SIZEOF_POINTER == __SIZEOF_POINTER__, __SIZEOF_POINTER__ ); #endif +#if defined( RTEMS_SMP ) + RTEMS_STATIC_ASSERT( + sizeof( Per_CPU_Control_envelope ) == PER_CPU_CONTROL_SIZE, + PER_CPU_CONTROL_SIZE + ); +#endif + RTEMS_STATIC_ASSERT( offsetof(Per_CPU_Control, isr_nest_level) == PER_CPU_ISR_NEST_LEVEL, PER_CPU_ISR_NEST_LEVEL -- cgit v1.2.3