From a11e1ff57688a5cee5b0f807b970fe9eec05ac62 Mon Sep 17 00:00:00 2001 From: Sebastian Huber Date: Tue, 7 Mar 2017 07:58:11 +0100 Subject: powerpc: Optimize AltiVec context switch Use r8 instead of r5 to slightly optimize _CPU_Context_switch(). It is not a big deal, however, we already assume r12 is used by _CPU_Context_switch(). Treat r5 the in same way. --- .../libcpu/powerpc/mpc6xx/altivec/vec_sup_asm.S | 59 +++++++++++----------- c/src/lib/libcpu/powerpc/new-exceptions/cpu_asm.S | 2 - 2 files changed, 30 insertions(+), 31 deletions(-) diff --git a/c/src/lib/libcpu/powerpc/mpc6xx/altivec/vec_sup_asm.S b/c/src/lib/libcpu/powerpc/mpc6xx/altivec/vec_sup_asm.S index d836162d18..279d1704a7 100644 --- a/c/src/lib/libcpu/powerpc/mpc6xx/altivec/vec_sup_asm.S +++ b/c/src/lib/libcpu/powerpc/mpc6xx/altivec/vec_sup_asm.S @@ -73,9 +73,10 @@ .set r0, 0 .set r3, 3 .set r4, 4 - .set r5, 5 + /* Do not use r5, since this is used by _CPU_Context_switch() */ .set r6, 6 .set r7, 7 + .set r8, 8 .set r9, 9 .set r10, 10 .set r11, 11 @@ -578,12 +579,12 @@ _CPU_save_altivec_volatile: mfcr r9 #endif - PREP_FOR_SAVE r0, r3, r4, r5, r6, r10 + PREP_FOR_SAVE r0, r3, r4, r8, r6, r10 /* r0 now contains VRSAVE, r3 still the aligned memory area - * and r4, r5, r6 are offset by 16, 32, and 48 bytes from r3, + * and r4, r8, r6 are offset by 16, 32, and 48 bytes from r3, * respectively. r10 holds zero */ - S_V0TOV19 _B0=r3, _B1=r4, _B2=r5, _B3=r6, _O1=r10, _O2=r11 + S_V0TOV19 _B0=r3, _B1=r4, _B2=r8, _B3=r6, _O1=r10, _O2=r11 mfvscr v0 /* Store vrsave (still in r0) and vscr (in v0) to memory area */ S_VSCR_VRSAVE r0, v0, r3, r11 @@ -613,10 +614,10 @@ _CPU_load_altivec_volatile: /* Start preloading 2nd line (where first two vectors are) */ dcbt 0, r3 L_VSCR_VRSAVE r3, r0, v0 - CMP_BASES r3, r4, r5, r6, r10 + CMP_BASES r3, r4, r8, r6, r10 /* Start preloading 3rd line (where vectors 3 and 4 are) */ - dcbt 0, r5 - L_V0TOV19 r3, r4, r5, r6, r10, r11 + dcbt 0, r8 + L_V0TOV19 r3, r4, r8, r6, r10, r11 #ifndef IGNORE_VRSAVE mtcr r9 @@ -627,9 +628,9 @@ _CPU_load_altivec_volatile: _CPU_Context_switch_altivec: /* fetch offset of altivec area in context */ - CMPOFF r5 + CMPOFF r8 /* down-align 'to' area to cache-line boundary */ - add r4, r4, r5 + add r4, r4, r8 CACHE_DOWNALGN r4 /* Check for PSIM */ @@ -658,21 +659,21 @@ _CPU_Context_switch_altivec: /* SAVE NON-VOLATILE REGISTERS */ - /* Compute aligned destination pointer (r5 still holds offset + /* Compute aligned destination pointer (r8 still holds offset * to 'altivec' area in context) */ - add r3, r3, r5 + add r3, r3, r8 CACHE_DOWNALGN r3 - PREP_FOR_SAVE r0, r3, r5, r6, r7, r10 + PREP_FOR_SAVE r0, r3, r8, r6, r7, r10 /* The manual says reading vscr can take some time - do * read it here (into a volatile vector register) while * we wait for cache blocks to be allocated */ mfvscr v0 - S_V20TOV31 _LRU=l, _B0=r3, _B1=r5, _B2=r6, _B3=r7, _O1=r10, _O2=r11 + S_V20TOV31 _LRU=l, _B0=r3, _B1=r8, _B2=r6, _B3=r7, _O1=r10, _O2=r11 /* vrsave is now in r0 (PREP_FOR_SAVE), vscr in v0 */ - S_VSCR_VRSAVE r0, v0, r3, r5 + S_VSCR_VRSAVE r0, v0, r3, r8 1: @@ -681,8 +682,8 @@ _CPU_Context_switch_altivec: /* Advance past vrsave/vscr area */ addi r4, r4, PPC_CACHE_ALIGNMENT L_VSCR_VRSAVE r4, r0, v0 - CMP_BASES r4, r5, r6, r7, r10 - L_V20TOV31 r4, r5, r6, r7, r10, r11 + CMP_BASES r4, r8, r6, r7, r10 + L_V20TOV31 r4, r8, r6, r7, r10, r11 #ifndef IGNORE_VRSAVE mtcr r9 @@ -691,12 +692,12 @@ _CPU_Context_switch_altivec: .global _CPU_Context_initialize_altivec _CPU_Context_initialize_altivec: - CMPOFF r5 - add r3, r3, r5 + CMPOFF r8 + add r3, r3, r8 CACHE_DOWNALGN r3 - lis r5, _CPU_altivec_vrsave_initval@ha - lwz r5, _CPU_altivec_vrsave_initval@l(r5) - stw r5, VRSAVE_OFF(r3) + lis r8, _CPU_altivec_vrsave_initval@ha + lwz r8, _CPU_altivec_vrsave_initval@l(r8) + stw r8, VRSAVE_OFF(r3) lis r6, _CPU_altivec_vscr_initval@ha lwz r6, _CPU_altivec_vscr_initval@l(r6) stw r6, VSCR_OFF(r3) @@ -715,8 +716,8 @@ _CPU_Context_initialize_altivec: */ .global _CPU_altivec_set_vrsave_initval _CPU_altivec_set_vrsave_initval: - lis r5, _CPU_altivec_vrsave_initval@ha - stw r3, _CPU_altivec_vrsave_initval@l(r5) + lis r8, _CPU_altivec_vrsave_initval@ha + stw r3, _CPU_altivec_vrsave_initval@l(r8) mtvrsave r3 blr @@ -771,10 +772,10 @@ _CPU_altivec_load_all: /* Start preloading 2nd line (where first two vectors are) */ dcbt 0, r3 L_VSCR_VRSAVE r3, r0, v0 - CMP_BASES r3, r4, r5, r6, r10 + CMP_BASES r3, r4, r8, r6, r10 /* Start preloading 3rd line (where vectors 3 and 4 are) */ - dcbt 0, r5 - L_V0TOV31 r3, r4, r5, r6, r10, r11 + dcbt 0, r8 + L_V0TOV31 r3, r4, r8, r6, r10, r11 #ifndef IGNORE_VRSAVE mtcr r9 @@ -794,12 +795,12 @@ _CPU_altivec_save_all: mfcr r9 #endif - PREP_FOR_SAVE r0, r3, r4, r5, r6, r10 + PREP_FOR_SAVE r0, r3, r4, r8, r6, r10 /* r0 now contains VRSAVE, r3 still the aligned memory area - * and r4, r5, r6 are offset by 16, 32, and 48 bytes from r3, + * and r4, r8, r6 are offset by 16, 32, and 48 bytes from r3, * respectively. r10 holds zero */ - S_V0TOV31 _B0=r3, _B1=r4, _B2=r5, _B3=r6, _O1=r10, _O2=r11 + S_V0TOV31 _B0=r3, _B1=r4, _B2=r8, _B3=r6, _O1=r10, _O2=r11 mfvscr v0 /* Store vrsave (still in r0) and vscr (in v0) to memory area */ S_VSCR_VRSAVE r0, v0, r3, r11 diff --git a/c/src/lib/libcpu/powerpc/new-exceptions/cpu_asm.S b/c/src/lib/libcpu/powerpc/new-exceptions/cpu_asm.S index a1ec64b8e4..c0e11da076 100644 --- a/c/src/lib/libcpu/powerpc/new-exceptions/cpu_asm.S +++ b/c/src/lib/libcpu/powerpc/new-exceptions/cpu_asm.S @@ -435,11 +435,9 @@ PROC (_CPU_Context_switch): restore_context: #if defined(__ALTIVEC__) && !defined(PPC_MULTILIB_ALTIVEC) - mr r14, r5 mr r4, r5 .extern _CPU_Context_switch_altivec bl _CPU_Context_switch_altivec - mr r5, r14 #endif lwz r1, PPC_CONTEXT_OFFSET_GPR1(r5) -- cgit v1.2.3