From d907c2294a438f07099730b88ff88a659e940de7 Mon Sep 17 00:00:00 2001 From: Sebastian Huber Date: Mon, 2 Nov 2020 13:02:20 +0100 Subject: powerpc: Add support for VRSAVE The VRSAVE feature of the Altivec unit can be used to reduce the amount of Altivec registers which need to be saved/restored during interrupt processing and context switches. In order to use the VRSAVE optimization a corresponding multilib (-mvrsave) is required, see GCC configuration. The -mvrsave option must be added to the ABI_FLAGS of the BSP. Currently only the -mcpu=e6500 based QorIQ BSP support this optimization. Update #4712. --- cpukit/score/cpu/powerpc/cpu.c | 5 +- cpukit/score/cpu/powerpc/include/rtems/score/cpu.h | 25 +++++-- cpukit/score/cpu/powerpc/ppc-context-validate.S | 77 ++++++++++++++++++++-- 3 files changed, 92 insertions(+), 15 deletions(-) (limited to 'cpukit/score') diff --git a/cpukit/score/cpu/powerpc/cpu.c b/cpukit/score/cpu/powerpc/cpu.c index 6147d7be74..bdb9cf6ab5 100644 --- a/cpukit/score/cpu/powerpc/cpu.c +++ b/cpukit/score/cpu/powerpc/cpu.c @@ -79,8 +79,10 @@ PPC_ASSERT_OFFSET(isr_dispatch_disable, ISR_DISPATCH_DISABLE); #endif #ifdef PPC_MULTILIB_ALTIVEC + PPC_ASSERT_OFFSET(vrsave, VRSAVE); + PPC_ASSERT_OFFSET(vscr, VSCR); RTEMS_STATIC_ASSERT( - PPC_CONTEXT_OFFSET_V20 % 16 == 0, + PPC_CONTEXT_OFFSET_V20 % PPC_DEFAULT_CACHE_LINE_SIZE == 0, ppc_context_altivec ); PPC_ASSERT_OFFSET(v20, V20); @@ -95,7 +97,6 @@ PPC_ASSERT_OFFSET(isr_dispatch_disable, ISR_DISPATCH_DISABLE); PPC_ASSERT_OFFSET(v29, V29); PPC_ASSERT_OFFSET(v30, V30); PPC_ASSERT_OFFSET(v31, V31); - PPC_ASSERT_OFFSET(vrsave, VRSAVE); #endif #ifdef PPC_MULTILIB_FPU diff --git a/cpukit/score/cpu/powerpc/include/rtems/score/cpu.h b/cpukit/score/cpu/powerpc/include/rtems/score/cpu.h index 42900aeb1d..a9f0acac00 100644 --- a/cpukit/score/cpu/powerpc/include/rtems/score/cpu.h +++ b/cpukit/score/cpu/powerpc/include/rtems/score/cpu.h @@ -29,7 +29,7 @@ * * Copyright (c) 2001 Surrey Satellite Technology Limited (SSTL). * - * Copyright (c) 2010, 2017 embedded brains GmbH. + * Copyright (c) 2010, 2020 embedded brains GmbH. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -246,6 +246,13 @@ typedef struct { uint32_t isr_dispatch_disable; uint32_t reserved_for_alignment; #if defined(PPC_MULTILIB_ALTIVEC) + #if !defined(__powerpc64__) + uint32_t reserved_for_alignment_2[4]; + #endif + uint32_t vrsave; + uint32_t reserved_for_alignment_3[2]; + /* This field must take stvewx/lvewx requirements into account */ + uint32_t vscr; uint8_t v20[16]; uint8_t v21[16]; uint8_t v22[16]; @@ -258,7 +265,6 @@ typedef struct { uint8_t v29[16]; uint8_t v30[16]; uint8_t v31[16]; - uint32_t vrsave; #elif defined(__ALTIVEC__) /* * 12 non-volatile vector registers, cache-aligned area for vscr/vrsave @@ -373,8 +379,16 @@ static inline ppc_context *ppc_get_context( const Context_Control *context ) #define PPC_CONTEXT_OFFSET_ISR_DISPATCH_DISABLE PPC_CONTEXT_GPR_OFFSET( 32 ) #ifdef PPC_MULTILIB_ALTIVEC + #ifdef __powerpc64__ + #define PPC_CONTEXT_OFFSET_VRSAVE \ + ( PPC_CONTEXT_OFFSET_ISR_DISPATCH_DISABLE + 8 ) + #else + #define PPC_CONTEXT_OFFSET_VRSAVE \ + ( PPC_CONTEXT_OFFSET_ISR_DISPATCH_DISABLE + 24 ) + #endif + #define PPC_CONTEXT_OFFSET_VSCR ( PPC_CONTEXT_OFFSET_VRSAVE + 12 ) #define PPC_CONTEXT_OFFSET_V( v ) \ - ( ( ( v ) - 20 ) * 16 + PPC_CONTEXT_OFFSET_ISR_DISPATCH_DISABLE + 8) + ( ( ( v ) - 20 ) * 16 + PPC_CONTEXT_OFFSET_VRSAVE + 16) #define PPC_CONTEXT_OFFSET_V20 PPC_CONTEXT_OFFSET_V( 20 ) #define PPC_CONTEXT_OFFSET_V21 PPC_CONTEXT_OFFSET_V( 21 ) #define PPC_CONTEXT_OFFSET_V22 PPC_CONTEXT_OFFSET_V( 22 ) @@ -387,9 +401,8 @@ static inline ppc_context *ppc_get_context( const Context_Control *context ) #define PPC_CONTEXT_OFFSET_V29 PPC_CONTEXT_OFFSET_V( 29 ) #define PPC_CONTEXT_OFFSET_V30 PPC_CONTEXT_OFFSET_V( 30 ) #define PPC_CONTEXT_OFFSET_V31 PPC_CONTEXT_OFFSET_V( 31 ) - #define PPC_CONTEXT_OFFSET_VRSAVE PPC_CONTEXT_OFFSET_V( 32 ) #define PPC_CONTEXT_OFFSET_F( f ) \ - ( ( ( f ) - 14 ) * 8 + PPC_CONTEXT_OFFSET_VRSAVE + 8 ) + ( ( ( f ) - 14 ) * 8 + PPC_CONTEXT_OFFSET_V( 32 ) ) #else #define PPC_CONTEXT_OFFSET_F( f ) \ ( ( ( f ) - 14 ) * 8 + PPC_CONTEXT_OFFSET_ISR_DISPATCH_DISABLE + 8 ) @@ -419,7 +432,7 @@ static inline ppc_context *ppc_get_context( const Context_Control *context ) #if defined(PPC_MULTILIB_FPU) #define PPC_CONTEXT_VOLATILE_SIZE PPC_CONTEXT_OFFSET_F( 32 ) #elif defined(PPC_MULTILIB_ALTIVEC) - #define PPC_CONTEXT_VOLATILE_SIZE (PPC_CONTEXT_OFFSET_VRSAVE + 4) + #define PPC_CONTEXT_VOLATILE_SIZE PPC_CONTEXT_OFFSET_V( 33 ) #elif defined(__ALTIVEC__) #define PPC_CONTEXT_VOLATILE_SIZE \ (PPC_CONTEXT_GPR_OFFSET( 32 ) + 8 \ diff --git a/cpukit/score/cpu/powerpc/ppc-context-validate.S b/cpukit/score/cpu/powerpc/ppc-context-validate.S index e4331b2661..67cb5b45c3 100644 --- a/cpukit/score/cpu/powerpc/ppc-context-validate.S +++ b/cpukit/score/cpu/powerpc/ppc-context-validate.S @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: BSD-2-Clause */ /* - * Copyright (c) 2013, 2017 embedded brains GmbH. All rights reserved. + * Copyright (c) 2013, 2020 embedded brains GmbH. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -99,6 +99,7 @@ #define VTMP_OFFSET VOFFSET(12) #define VTMP2_OFFSET VOFFSET(13) #define VRSAVE_OFFSET VOFFSET(14) + #define VRSAVE2_OFFSET (VOFFSET(14) + 4) #define VSCR_OFFSET (VOFFSET(14) + 12) #define ALTIVECEND VOFFSET(15) #else @@ -161,6 +162,13 @@ _CPU_Context_validate: #endif #ifdef PPC_MULTILIB_ALTIVEC + mfvrsave r0 + stw r0, VRSAVE_OFFSET(r1) + li r0, 0xffffffff + mtvrsave r0 + mfvscr v0 + li r0, VSCR_OFFSET + stvewx v0, r1, r0 li r0, V20_OFFSET stvx v20, r1, r0 li r0, V21_OFFSET @@ -185,11 +193,6 @@ _CPU_Context_validate: stvx v30, r1, r0 li r0, V31_OFFSET stvx v31, r1, r0 - mfvscr v0 - li r0, VSCR_OFFSET - stvewx v0, r1, r0 - mfvrsave r0 - stw r0, VRSAVE_OFFSET(r1) #endif /* Fill */ @@ -337,8 +340,10 @@ _CPU_Context_validate: FILL_V 29 FILL_V 30 FILL_V 31 +#ifndef __PPC_VRSAVE__ addi r4, r3, 0x700 mtvrsave r4 +#endif #endif /* Check */ @@ -516,6 +521,15 @@ check: #ifdef PPC_MULTILIB_ALTIVEC .macro CHECK_V i +#ifdef __PPC_VRSAVE__ + mfvrsave r4 +.if (31 - \i) > 15 + andis. r5, r4, 1 << (31 - \i - 16) +.else + andi. r5, r4, 1 << (31 - \i) +.endif + beq 1f +#endif li r4, VTMP_OFFSET stvx \i, r1, r4 lwz r5, VTMP_OFFSET(r1) @@ -534,9 +548,43 @@ check: addi r4, r3, 0x600 + \i cmpw r5, r4 bne restore +#ifdef __PPC_VRSAVE__ + mfvrsave r4 +.if (31 - \i) > 15 + xoris r4, r4, 1 << (31 - \i - 16) +.else + xori r4, r4, 1 << (31 - \i) +.endif + mtvrsave r4 + b 2f +1: +.if (31 - \i) > 15 + oris r4, r4, 1 << (31 - \i - 16) +.else + ori r4, r4, 1 << (31 - \i) +.endif + mtvrsave r4 + addi r4, r3, 0x300 + \i + stw r4, VTMP_OFFSET(r1) + addi r4, r3, 0x400 + \i + stw r4, VTMP_OFFSET + 4(r1) + addi r4, r3, 0x500 + \i + stw r4, VTMP_OFFSET + 8(r1) + addi r4, r3, 0x600 + \i + stw r4, VTMP_OFFSET + 12(r1) + li r4, VTMP_OFFSET + lvx \i, r1, r4 +2: +#endif .endm /* Check VSCR */ +#ifdef __PPC_VRSAVE__ + mfvrsave r4 + stw r4, VRSAVE2_OFFSET(r1) + oris r4, r4, 0x8000 + mtvrsave r4 +#endif li r4, VTMP_OFFSET stvx v0, r1, r4 mfvscr v0 @@ -548,6 +596,10 @@ check: bne restore li r4, VTMP_OFFSET lvx v0, r1, r4 +#ifdef __PPC_VRSAVE__ + lwz r4, VRSAVE2_OFFSET(r1) + mtvrsave r4 +#endif CHECK_V 0 CHECK_V 1 @@ -582,9 +634,15 @@ check: CHECK_V 30 CHECK_V 31 mfvrsave r5 +#ifdef __PPC_VRSAVE__ + addi r5, r5, 1 + cmplwi r0, r5, 1 + bgt restore +#else addi r4, r3, 0x700 cmpw r5, r4 bne restore +#endif #endif mtcr r29 @@ -595,7 +653,7 @@ check: restore: #ifdef PPC_MULTILIB_ALTIVEC - lwz r0, VRSAVE_OFFSET(r1) + li r0, 0xffffffff mtvrsave r0 li r0, V31_OFFSET lvx v31, r1, r0 @@ -621,6 +679,11 @@ restore: lvx v21, r1, r0 li r0, V20_OFFSET lvx v20, r1, r0 + li r0, VSCR_OFFSET + lvewx v0, r1, r0 + mtvscr v0 + lwz r0, VRSAVE_OFFSET(r1) + mtvrsave r0 #endif #ifdef PPC_MULTILIB_FPU -- cgit v1.2.3