From d907c2294a438f07099730b88ff88a659e940de7 Mon Sep 17 00:00:00 2001 From: Sebastian Huber Date: Mon, 2 Nov 2020 13:02:20 +0100 Subject: powerpc: Add support for VRSAVE The VRSAVE feature of the Altivec unit can be used to reduce the amount of Altivec registers which need to be saved/restored during interrupt processing and context switches. In order to use the VRSAVE optimization a corresponding multilib (-mvrsave) is required, see GCC configuration. The -mvrsave option must be added to the ABI_FLAGS of the BSP. Currently only the -mcpu=e6500 based QorIQ BSP support this optimization. Update #4712. --- bsps/powerpc/shared/cpu_asm.S | 156 ++++++++++++++++- .../shared/exceptions/ppc_exc_async_normal.S | 189 ++++++++++++++++++++- cpukit/score/cpu/powerpc/cpu.c | 5 +- cpukit/score/cpu/powerpc/include/rtems/score/cpu.h | 25 ++- cpukit/score/cpu/powerpc/ppc-context-validate.S | 77 ++++++++- 5 files changed, 429 insertions(+), 23 deletions(-) diff --git a/bsps/powerpc/shared/cpu_asm.S b/bsps/powerpc/shared/cpu_asm.S index 63f6a3fdfe..21860c8eea 100644 --- a/bsps/powerpc/shared/cpu_asm.S +++ b/bsps/powerpc/shared/cpu_asm.S @@ -23,7 +23,7 @@ * COPYRIGHT (c) 1989-1997. * On-Line Applications Research Corporation (OAR). * - * Copyright (c) 2011, 2017 embedded brains GmbH + * Copyright (c) 2011, 2020 embedded brains GmbH * * The license and distribution terms for this file may in * the file LICENSE in this distribution or at @@ -267,6 +267,10 @@ PROC (_CPU_Context_switch_no_return): isync #endif +#if defined(PPC_MULTILIB_ALTIVEC) && defined(__PPC_VRSAVE__) + mfvrsave r9 +#endif + /* Align to a cache line */ CLEAR_RIGHT_IMMEDIATE r3, r3, PPC_DEFAULT_CACHE_LINE_POWER CLEAR_RIGHT_IMMEDIATE r5, r4, PPC_DEFAULT_CACHE_LINE_POWER @@ -284,6 +288,14 @@ PROC (_CPU_Context_switch_no_return): mfmsr r6 #endif /* END PPC_DISABLE_MSR_ACCESS */ mfcr r7 +#ifdef PPC_MULTILIB_ALTIVEC +#ifdef __PPC_VRSAVE__ + /* Mark v0 as used since we need it to get the VSCR */ + oris r8, r9, 0x8000 + mtvrsave r8 +#endif + mfvscr v0 +#endif mflr r8 lwz r11, PER_CPU_ISR_DISPATCH_DISABLE(r12) @@ -356,6 +368,16 @@ PROC (_CPU_Context_switch_no_return): stw r11, PPC_CONTEXT_OFFSET_ISR_DISPATCH_DISABLE(r3) #ifdef PPC_MULTILIB_ALTIVEC + li r10, PPC_CONTEXT_OFFSET_VSCR + stvewx v0, r3, r10 + +#ifdef __PPC_VRSAVE__ + stw r9, PPC_CONTEXT_OFFSET_VRSAVE(r3) + andi. r9, r9, 0xfff + bne .Laltivec_save + +.Laltivec_save_continue: +#else /* __PPC_VRSAVE__ */ li r9, PPC_CONTEXT_OFFSET_V20 stvx v20, r3, r9 li r9, PPC_CONTEXT_OFFSET_V21 @@ -397,7 +419,8 @@ PROC (_CPU_Context_switch_no_return): stvx v31, r3, r9 mfvrsave r9 stw r9, PPC_CONTEXT_OFFSET_VRSAVE(r3) -#endif +#endif /* __PPC_VRSAVE__ */ +#endif /* PPC_MULTILIB_ALTIVEC */ #ifdef PPC_MULTILIB_FPU stfd f14, PPC_CONTEXT_OFFSET_F14(r3) @@ -461,6 +484,14 @@ restore_context: PPC_REG_LOAD r1, PPC_CONTEXT_OFFSET_GPR1(r5) PPC_REG_LOAD r8, PPC_CONTEXT_OFFSET_LR(r5) +#ifdef PPC_MULTILIB_ALTIVEC + li r10, PPC_CONTEXT_OFFSET_VSCR + lvewx v0, r5, r10 +#ifdef __PPC_VRSAVE__ + lwz r9, PPC_CONTEXT_OFFSET_VRSAVE(r5) +#endif +#endif + PPC_GPR_LOAD r14, PPC_CONTEXT_OFFSET_GPR14(r5) PPC_GPR_LOAD r15, PPC_CONTEXT_OFFSET_GPR15(r5) @@ -494,6 +525,15 @@ restore_context: lwz r11, PPC_CONTEXT_OFFSET_ISR_DISPATCH_DISABLE(r5) #ifdef PPC_MULTILIB_ALTIVEC + mtvscr v0 + +#ifdef __PPC_VRSAVE__ + mtvrsave r9 + andi. r9, r9, 0xfff + bne .Laltivec_restore + +.Laltivec_restore_continue: +#else /* __PPC_VRSAVE__ */ li r9, PPC_CONTEXT_OFFSET_V20 lvx v20, r5, r9 li r9, PPC_CONTEXT_OFFSET_V21 @@ -520,7 +560,8 @@ restore_context: lvx v31, r5, r9 lwz r9, PPC_CONTEXT_OFFSET_VRSAVE(r5) mtvrsave r9 -#endif +#endif /* __PPC_VRSAVE__ */ +#endif /* PPC_MULTILIB_ALTIVEC */ #ifdef PPC_MULTILIB_FPU lfd f14, PPC_CONTEXT_OFFSET_F14(r5) @@ -567,6 +608,13 @@ PROC (_CPU_Context_restore): li r3, 0 #endif +#if defined(PPC_MULTILIB_ALTIVEC) && defined(__PPC_VRSAVE__) + /* Mark v0 as used since we need it to get the VSCR */ + mfvrsave r9 + oris r8, r9, 0x8000 + mtvrsave r8 +#endif + b restore_context #ifdef RTEMS_SMP @@ -595,3 +643,105 @@ PROC (_CPU_Context_restore): b .Lcheck_is_executing #endif + +#if defined(PPC_MULTILIB_ALTIVEC) && defined(__PPC_VRSAVE__) +.Laltivec_save: + + /* + * Let X be VRSAVE, calculate: + * + * Z = X & 0x777 + * Z = Z + 0x777 + * X = X | Z + * + * Afterwards, we have in X for each group of four non-volatile VR + * registers: + * + * 0111b, if VRSAVE group of four registers == 0 + * 1XXXb, if VRSAVE group of four registers != 0 + */ + andi. r10, r9, 0x777 + addi r10, r10, 0x777 + or r9, r9, r10 + mtcr r9 + + bf 20, .Laltivec_save_v24 + li r9, PPC_CONTEXT_OFFSET_V20 + stvx v20, r3, r9 + li r9, PPC_CONTEXT_OFFSET_V21 + stvx v21, r3, r9 + li r9, PPC_CONTEXT_OFFSET_V22 + stvx v22, r3, r9 + li r9, PPC_CONTEXT_OFFSET_V23 + stvx v23, r3, r9 + +.Laltivec_save_v24: + + bf 24, .Laltivec_save_v28 + li r9, PPC_CONTEXT_OFFSET_V24 + stvx v24, r3, r9 + li r9, PPC_CONTEXT_OFFSET_V25 + stvx v25, r3, r9 + li r9, PPC_CONTEXT_OFFSET_V26 + stvx v26, r3, r9 + li r9, PPC_CONTEXT_OFFSET_V27 + stvx v27, r3, r9 + +.Laltivec_save_v28: + + bf 28, .Laltivec_save_continue + li r9, PPC_CONTEXT_OFFSET_V28 + stvx v28, r3, r9 + li r9, PPC_CONTEXT_OFFSET_V29 + stvx v29, r3, r9 + li r9, PPC_CONTEXT_OFFSET_V30 + stvx v30, r3, r9 + li r9, PPC_CONTEXT_OFFSET_V31 + stvx v31, r3, r9 + + b .Laltivec_save_continue + +.Laltivec_restore: + + /* See comment at .Laltivec_save */ + andi. r10, r9, 0x777 + addi r10, r10, 0x777 + or r9, r9, r10 + mtcr r9 + + bf 20, .Laltivec_restore_v24 + li r9, PPC_CONTEXT_OFFSET_V20 + lvx v20, r5, r9 + li r9, PPC_CONTEXT_OFFSET_V21 + lvx v21, r5, r9 + li r9, PPC_CONTEXT_OFFSET_V22 + lvx v22, r5, r9 + li r9, PPC_CONTEXT_OFFSET_V23 + lvx v23, r5, r9 + +.Laltivec_restore_v24: + + bf 24, .Laltivec_restore_v28 + li r9, PPC_CONTEXT_OFFSET_V24 + lvx v24, r5, r9 + li r9, PPC_CONTEXT_OFFSET_V25 + lvx v25, r5, r9 + li r9, PPC_CONTEXT_OFFSET_V26 + lvx v26, r5, r9 + li r9, PPC_CONTEXT_OFFSET_V27 + lvx v27, r5, r9 + +.Laltivec_restore_v28: + + bf 28, .Laltivec_restore_continue + li r9, PPC_CONTEXT_OFFSET_V28 + lvx v28, r5, r9 + li r9, PPC_CONTEXT_OFFSET_V29 + lvx v29, r5, r9 + li r9, PPC_CONTEXT_OFFSET_V30 + lvx v30, r5, r9 + li r9, PPC_CONTEXT_OFFSET_V31 + lvx v31, r5, r9 + + b .Laltivec_restore_continue +#endif /* PPC_MULTILIB_ALTIVEC && __PPC_VRSAVE__ */ diff --git a/bsps/powerpc/shared/exceptions/ppc_exc_async_normal.S b/bsps/powerpc/shared/exceptions/ppc_exc_async_normal.S index de4621ef55..f6318de256 100644 --- a/bsps/powerpc/shared/exceptions/ppc_exc_async_normal.S +++ b/bsps/powerpc/shared/exceptions/ppc_exc_async_normal.S @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: BSD-2-Clause */ /* - * Copyright (c) 2011, 2017 embedded brains GmbH. All rights reserved. + * Copyright (c) 2011, 2020 embedded brains GmbH. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -173,8 +173,15 @@ ppc_exc_interrupt: evstdd SCRATCH_5_REGISTER, PPC_EXC_ACC_OFFSET(r1) #endif -#ifdef PPC_MULTILIB_ALTIVEC /* Save volatile AltiVec context */ +#ifdef PPC_MULTILIB_ALTIVEC +#ifdef __PPC_VRSAVE__ + mfvrsave SCRATCH_0_REGISTER + cmpwi SCRATCH_0_REGISTER, 0 + bne .Laltivec_save + +.Laltivec_save_continue: +#else /* __PPC_VRSAVE__ */ li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(0) stvx v0, r1, SCRATCH_0_REGISTER mfvscr v0 @@ -218,7 +225,8 @@ ppc_exc_interrupt: stvx v19, r1, SCRATCH_0_REGISTER li SCRATCH_0_REGISTER, PPC_EXC_MIN_VSCR_OFFSET stvewx v0, r1, SCRATCH_0_REGISTER -#endif +#endif /* __PPC_VRSAVE__ */ +#endif /* PPC_MULTILIB_ALTIVEC */ #ifdef PPC_MULTILIB_FPU /* Save volatile FPU context */ @@ -334,8 +342,15 @@ ppc_exc_interrupt: .Lthread_dispatch_done: -#ifdef PPC_MULTILIB_ALTIVEC /* Restore volatile AltiVec context */ +#ifdef PPC_MULTILIB_ALTIVEC +#ifdef __PPC_VRSAVE__ + mfvrsave SCRATCH_0_REGISTER + cmpwi SCRATCH_0_REGISTER, 0 + bne .Laltivec_restore + +.Laltivec_restore_continue: +#else /* __PPC_VRSAVE__ */ li SCRATCH_0_REGISTER, PPC_EXC_MIN_VSCR_OFFSET lvewx v0, r1, SCRATCH_0_REGISTER mtvscr v0 @@ -379,7 +394,8 @@ ppc_exc_interrupt: lvx v18, r1, SCRATCH_0_REGISTER li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(19) lvx v19, r1, SCRATCH_0_REGISTER -#endif +#endif /* __PPC_VRSAVE__ */ +#endif /* PPC_MULTILIB_ALTIVEC */ #ifdef PPC_MULTILIB_FPU /* Restore volatile FPU context */ @@ -478,6 +494,169 @@ ppc_exc_interrupt: /* Return */ rfi +#if defined(PPC_MULTILIB_ALTIVEC) && defined(__PPC_VRSAVE__) +.Laltivec_save: + + /* + * Let X be VRSAVE, calculate: + * + * Y = 0x77777777 + * Z = X & Y + * Z = Z + Y + * X = X | Z + * + * Afterwards, we have in X for each group of four VR registers: + * + * 0111b, if VRSAVE group of four registers == 0 + * 1XXXb, if VRSAVE group of four registers != 0 + */ + lis SCRATCH_5_REGISTER, 0x7777 + ori SCRATCH_5_REGISTER, SCRATCH_5_REGISTER, 0x7777 + and SCRATCH_6_REGISTER, SCRATCH_0_REGISTER, SCRATCH_5_REGISTER + add SCRATCH_6_REGISTER, SCRATCH_5_REGISTER, SCRATCH_6_REGISTER + or SCRATCH_0_REGISTER, SCRATCH_0_REGISTER, SCRATCH_6_REGISTER + mtcr SCRATCH_0_REGISTER + + li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(0) + stvx v0, r1, SCRATCH_0_REGISTER + + /* Move VCSR to V0 */ + mfvscr v0 + + li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(1) + stvx v1, r1, SCRATCH_0_REGISTER + li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(2) + stvx v2, r1, SCRATCH_0_REGISTER + li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(3) + stvx v3, r1, SCRATCH_0_REGISTER + + /* Save VCSR using V0 */ + li SCRATCH_0_REGISTER, PPC_EXC_MIN_VSCR_OFFSET + stvewx v0, r1, SCRATCH_0_REGISTER + + bf 4, .Laltivec_save_v8 + li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(4) + stvx v4, r1, SCRATCH_0_REGISTER + li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(5) + stvx v5, r1, SCRATCH_0_REGISTER + li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(6) + stvx v6, r1, SCRATCH_0_REGISTER + li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(7) + stvx v7, r1, SCRATCH_0_REGISTER + +.Laltivec_save_v8: + + bf 8, .Laltivec_save_v12 + li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(8) + stvx v8, r1, SCRATCH_0_REGISTER + li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(9) + stvx v9, r1, SCRATCH_0_REGISTER + li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(10) + stvx v10, r1, SCRATCH_0_REGISTER + li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(11) + stvx v11, r1, SCRATCH_0_REGISTER + +.Laltivec_save_v12: + + bf 12, .Laltivec_save_v16 + li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(12) + stvx v12, r1, SCRATCH_0_REGISTER + li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(13) + stvx v13, r1, SCRATCH_0_REGISTER + li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(14) + stvx v14, r1, SCRATCH_0_REGISTER + li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(15) + stvx v15, r1, SCRATCH_0_REGISTER + +.Laltivec_save_v16: + + bf 16, .Laltivec_save_continue + li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(16) + stvx v16, r1, SCRATCH_0_REGISTER + li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(17) + stvx v17, r1, SCRATCH_0_REGISTER + li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(18) + stvx v18, r1, SCRATCH_0_REGISTER + li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(19) + stvx v19, r1, SCRATCH_0_REGISTER + + b .Laltivec_save_continue + +.Laltivec_restore: + + /* Load VCSR using V0 */ + li SCRATCH_5_REGISTER, PPC_EXC_MIN_VSCR_OFFSET + lvewx v0, r1, SCRATCH_5_REGISTER + + /* See comment at .Laltivec_save */ + lis SCRATCH_5_REGISTER, 0x7777 + ori SCRATCH_5_REGISTER, SCRATCH_5_REGISTER, 0x7777 + and SCRATCH_6_REGISTER, SCRATCH_0_REGISTER, SCRATCH_5_REGISTER + add SCRATCH_6_REGISTER, SCRATCH_5_REGISTER, SCRATCH_6_REGISTER + or SCRATCH_0_REGISTER, SCRATCH_0_REGISTER, SCRATCH_6_REGISTER + mtcr SCRATCH_0_REGISTER + + /* Restore VCR using V0 */ + mtvscr v0 + + li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(0) + lvx v0, r1, SCRATCH_0_REGISTER + li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(1) + lvx v1, r1, SCRATCH_0_REGISTER + li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(2) + lvx v2, r1, SCRATCH_0_REGISTER + li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(3) + lvx v3, r1, SCRATCH_0_REGISTER + + bf 4, .Laltivec_restore_v8 + li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(4) + lvx v4, r1, SCRATCH_0_REGISTER + li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(5) + lvx v5, r1, SCRATCH_0_REGISTER + li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(6) + lvx v6, r1, SCRATCH_0_REGISTER + li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(7) + lvx v7, r1, SCRATCH_0_REGISTER + +.Laltivec_restore_v8: + + bf 8, .Laltivec_restore_v12 + li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(8) + lvx v8, r1, SCRATCH_0_REGISTER + li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(9) + lvx v9, r1, SCRATCH_0_REGISTER + li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(10) + lvx v10, r1, SCRATCH_0_REGISTER + li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(11) + lvx v11, r1, SCRATCH_0_REGISTER + +.Laltivec_restore_v12: + + bf 12, .Laltivec_restore_v16 + li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(12) + lvx v12, r1, SCRATCH_0_REGISTER + li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(13) + lvx v13, r1, SCRATCH_0_REGISTER + li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(14) + lvx v14, r1, SCRATCH_0_REGISTER + li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(15) + lvx v15, r1, SCRATCH_0_REGISTER + +.Laltivec_restore_v16: + + bf 16, .Laltivec_restore_continue + li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(16) + lvx v16, r1, SCRATCH_0_REGISTER + li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(17) + lvx v17, r1, SCRATCH_0_REGISTER + li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(18) + lvx v18, r1, SCRATCH_0_REGISTER + li SCRATCH_0_REGISTER, PPC_EXC_MIN_VR_OFFSET(19) + lvx v19, r1, SCRATCH_0_REGISTER + + b .Laltivec_restore_continue +#endif /* PPC_MULTILIB_ALTIVEC && __PPC_VRSAVE__ */ + /* Symbol provided for debugging and tracing */ ppc_exc_interrupt_end: diff --git a/cpukit/score/cpu/powerpc/cpu.c b/cpukit/score/cpu/powerpc/cpu.c index 6147d7be74..bdb9cf6ab5 100644 --- a/cpukit/score/cpu/powerpc/cpu.c +++ b/cpukit/score/cpu/powerpc/cpu.c @@ -79,8 +79,10 @@ PPC_ASSERT_OFFSET(isr_dispatch_disable, ISR_DISPATCH_DISABLE); #endif #ifdef PPC_MULTILIB_ALTIVEC + PPC_ASSERT_OFFSET(vrsave, VRSAVE); + PPC_ASSERT_OFFSET(vscr, VSCR); RTEMS_STATIC_ASSERT( - PPC_CONTEXT_OFFSET_V20 % 16 == 0, + PPC_CONTEXT_OFFSET_V20 % PPC_DEFAULT_CACHE_LINE_SIZE == 0, ppc_context_altivec ); PPC_ASSERT_OFFSET(v20, V20); @@ -95,7 +97,6 @@ PPC_ASSERT_OFFSET(isr_dispatch_disable, ISR_DISPATCH_DISABLE); PPC_ASSERT_OFFSET(v29, V29); PPC_ASSERT_OFFSET(v30, V30); PPC_ASSERT_OFFSET(v31, V31); - PPC_ASSERT_OFFSET(vrsave, VRSAVE); #endif #ifdef PPC_MULTILIB_FPU diff --git a/cpukit/score/cpu/powerpc/include/rtems/score/cpu.h b/cpukit/score/cpu/powerpc/include/rtems/score/cpu.h index 42900aeb1d..a9f0acac00 100644 --- a/cpukit/score/cpu/powerpc/include/rtems/score/cpu.h +++ b/cpukit/score/cpu/powerpc/include/rtems/score/cpu.h @@ -29,7 +29,7 @@ * * Copyright (c) 2001 Surrey Satellite Technology Limited (SSTL). * - * Copyright (c) 2010, 2017 embedded brains GmbH. + * Copyright (c) 2010, 2020 embedded brains GmbH. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -246,6 +246,13 @@ typedef struct { uint32_t isr_dispatch_disable; uint32_t reserved_for_alignment; #if defined(PPC_MULTILIB_ALTIVEC) + #if !defined(__powerpc64__) + uint32_t reserved_for_alignment_2[4]; + #endif + uint32_t vrsave; + uint32_t reserved_for_alignment_3[2]; + /* This field must take stvewx/lvewx requirements into account */ + uint32_t vscr; uint8_t v20[16]; uint8_t v21[16]; uint8_t v22[16]; @@ -258,7 +265,6 @@ typedef struct { uint8_t v29[16]; uint8_t v30[16]; uint8_t v31[16]; - uint32_t vrsave; #elif defined(__ALTIVEC__) /* * 12 non-volatile vector registers, cache-aligned area for vscr/vrsave @@ -373,8 +379,16 @@ static inline ppc_context *ppc_get_context( const Context_Control *context ) #define PPC_CONTEXT_OFFSET_ISR_DISPATCH_DISABLE PPC_CONTEXT_GPR_OFFSET( 32 ) #ifdef PPC_MULTILIB_ALTIVEC + #ifdef __powerpc64__ + #define PPC_CONTEXT_OFFSET_VRSAVE \ + ( PPC_CONTEXT_OFFSET_ISR_DISPATCH_DISABLE + 8 ) + #else + #define PPC_CONTEXT_OFFSET_VRSAVE \ + ( PPC_CONTEXT_OFFSET_ISR_DISPATCH_DISABLE + 24 ) + #endif + #define PPC_CONTEXT_OFFSET_VSCR ( PPC_CONTEXT_OFFSET_VRSAVE + 12 ) #define PPC_CONTEXT_OFFSET_V( v ) \ - ( ( ( v ) - 20 ) * 16 + PPC_CONTEXT_OFFSET_ISR_DISPATCH_DISABLE + 8) + ( ( ( v ) - 20 ) * 16 + PPC_CONTEXT_OFFSET_VRSAVE + 16) #define PPC_CONTEXT_OFFSET_V20 PPC_CONTEXT_OFFSET_V( 20 ) #define PPC_CONTEXT_OFFSET_V21 PPC_CONTEXT_OFFSET_V( 21 ) #define PPC_CONTEXT_OFFSET_V22 PPC_CONTEXT_OFFSET_V( 22 ) @@ -387,9 +401,8 @@ static inline ppc_context *ppc_get_context( const Context_Control *context ) #define PPC_CONTEXT_OFFSET_V29 PPC_CONTEXT_OFFSET_V( 29 ) #define PPC_CONTEXT_OFFSET_V30 PPC_CONTEXT_OFFSET_V( 30 ) #define PPC_CONTEXT_OFFSET_V31 PPC_CONTEXT_OFFSET_V( 31 ) - #define PPC_CONTEXT_OFFSET_VRSAVE PPC_CONTEXT_OFFSET_V( 32 ) #define PPC_CONTEXT_OFFSET_F( f ) \ - ( ( ( f ) - 14 ) * 8 + PPC_CONTEXT_OFFSET_VRSAVE + 8 ) + ( ( ( f ) - 14 ) * 8 + PPC_CONTEXT_OFFSET_V( 32 ) ) #else #define PPC_CONTEXT_OFFSET_F( f ) \ ( ( ( f ) - 14 ) * 8 + PPC_CONTEXT_OFFSET_ISR_DISPATCH_DISABLE + 8 ) @@ -419,7 +432,7 @@ static inline ppc_context *ppc_get_context( const Context_Control *context ) #if defined(PPC_MULTILIB_FPU) #define PPC_CONTEXT_VOLATILE_SIZE PPC_CONTEXT_OFFSET_F( 32 ) #elif defined(PPC_MULTILIB_ALTIVEC) - #define PPC_CONTEXT_VOLATILE_SIZE (PPC_CONTEXT_OFFSET_VRSAVE + 4) + #define PPC_CONTEXT_VOLATILE_SIZE PPC_CONTEXT_OFFSET_V( 33 ) #elif defined(__ALTIVEC__) #define PPC_CONTEXT_VOLATILE_SIZE \ (PPC_CONTEXT_GPR_OFFSET( 32 ) + 8 \ diff --git a/cpukit/score/cpu/powerpc/ppc-context-validate.S b/cpukit/score/cpu/powerpc/ppc-context-validate.S index e4331b2661..67cb5b45c3 100644 --- a/cpukit/score/cpu/powerpc/ppc-context-validate.S +++ b/cpukit/score/cpu/powerpc/ppc-context-validate.S @@ -1,7 +1,7 @@ /* SPDX-License-Identifier: BSD-2-Clause */ /* - * Copyright (c) 2013, 2017 embedded brains GmbH. All rights reserved. + * Copyright (c) 2013, 2020 embedded brains GmbH. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions @@ -99,6 +99,7 @@ #define VTMP_OFFSET VOFFSET(12) #define VTMP2_OFFSET VOFFSET(13) #define VRSAVE_OFFSET VOFFSET(14) + #define VRSAVE2_OFFSET (VOFFSET(14) + 4) #define VSCR_OFFSET (VOFFSET(14) + 12) #define ALTIVECEND VOFFSET(15) #else @@ -161,6 +162,13 @@ _CPU_Context_validate: #endif #ifdef PPC_MULTILIB_ALTIVEC + mfvrsave r0 + stw r0, VRSAVE_OFFSET(r1) + li r0, 0xffffffff + mtvrsave r0 + mfvscr v0 + li r0, VSCR_OFFSET + stvewx v0, r1, r0 li r0, V20_OFFSET stvx v20, r1, r0 li r0, V21_OFFSET @@ -185,11 +193,6 @@ _CPU_Context_validate: stvx v30, r1, r0 li r0, V31_OFFSET stvx v31, r1, r0 - mfvscr v0 - li r0, VSCR_OFFSET - stvewx v0, r1, r0 - mfvrsave r0 - stw r0, VRSAVE_OFFSET(r1) #endif /* Fill */ @@ -337,8 +340,10 @@ _CPU_Context_validate: FILL_V 29 FILL_V 30 FILL_V 31 +#ifndef __PPC_VRSAVE__ addi r4, r3, 0x700 mtvrsave r4 +#endif #endif /* Check */ @@ -516,6 +521,15 @@ check: #ifdef PPC_MULTILIB_ALTIVEC .macro CHECK_V i +#ifdef __PPC_VRSAVE__ + mfvrsave r4 +.if (31 - \i) > 15 + andis. r5, r4, 1 << (31 - \i - 16) +.else + andi. r5, r4, 1 << (31 - \i) +.endif + beq 1f +#endif li r4, VTMP_OFFSET stvx \i, r1, r4 lwz r5, VTMP_OFFSET(r1) @@ -534,9 +548,43 @@ check: addi r4, r3, 0x600 + \i cmpw r5, r4 bne restore +#ifdef __PPC_VRSAVE__ + mfvrsave r4 +.if (31 - \i) > 15 + xoris r4, r4, 1 << (31 - \i - 16) +.else + xori r4, r4, 1 << (31 - \i) +.endif + mtvrsave r4 + b 2f +1: +.if (31 - \i) > 15 + oris r4, r4, 1 << (31 - \i - 16) +.else + ori r4, r4, 1 << (31 - \i) +.endif + mtvrsave r4 + addi r4, r3, 0x300 + \i + stw r4, VTMP_OFFSET(r1) + addi r4, r3, 0x400 + \i + stw r4, VTMP_OFFSET + 4(r1) + addi r4, r3, 0x500 + \i + stw r4, VTMP_OFFSET + 8(r1) + addi r4, r3, 0x600 + \i + stw r4, VTMP_OFFSET + 12(r1) + li r4, VTMP_OFFSET + lvx \i, r1, r4 +2: +#endif .endm /* Check VSCR */ +#ifdef __PPC_VRSAVE__ + mfvrsave r4 + stw r4, VRSAVE2_OFFSET(r1) + oris r4, r4, 0x8000 + mtvrsave r4 +#endif li r4, VTMP_OFFSET stvx v0, r1, r4 mfvscr v0 @@ -548,6 +596,10 @@ check: bne restore li r4, VTMP_OFFSET lvx v0, r1, r4 +#ifdef __PPC_VRSAVE__ + lwz r4, VRSAVE2_OFFSET(r1) + mtvrsave r4 +#endif CHECK_V 0 CHECK_V 1 @@ -582,9 +634,15 @@ check: CHECK_V 30 CHECK_V 31 mfvrsave r5 +#ifdef __PPC_VRSAVE__ + addi r5, r5, 1 + cmplwi r0, r5, 1 + bgt restore +#else addi r4, r3, 0x700 cmpw r5, r4 bne restore +#endif #endif mtcr r29 @@ -595,7 +653,7 @@ check: restore: #ifdef PPC_MULTILIB_ALTIVEC - lwz r0, VRSAVE_OFFSET(r1) + li r0, 0xffffffff mtvrsave r0 li r0, V31_OFFSET lvx v31, r1, r0 @@ -621,6 +679,11 @@ restore: lvx v21, r1, r0 li r0, V20_OFFSET lvx v20, r1, r0 + li r0, VSCR_OFFSET + lvewx v0, r1, r0 + mtvscr v0 + lwz r0, VRSAVE_OFFSET(r1) + mtvrsave r0 #endif #ifdef PPC_MULTILIB_FPU -- cgit v1.2.3