From 4fd1ff0f0d8d1e3029f488a011acd83115dccdef Mon Sep 17 00:00:00 2001 From: Sebastian Huber Date: Mon, 26 Mar 2018 06:57:10 +0200 Subject: bsps/powerpc: Move AltiVec support to bsps This patch is a part of the BSP source reorganization. Update #3285. --- .../libcpu/powerpc/mpc6xx/altivec/vec_sup_asm.S | 821 --------------------- 1 file changed, 821 deletions(-) delete mode 100644 c/src/lib/libcpu/powerpc/mpc6xx/altivec/vec_sup_asm.S (limited to 'c/src/lib/libcpu/powerpc/mpc6xx/altivec/vec_sup_asm.S') diff --git a/c/src/lib/libcpu/powerpc/mpc6xx/altivec/vec_sup_asm.S b/c/src/lib/libcpu/powerpc/mpc6xx/altivec/vec_sup_asm.S deleted file mode 100644 index 279d1704a7..0000000000 --- a/c/src/lib/libcpu/powerpc/mpc6xx/altivec/vec_sup_asm.S +++ /dev/null @@ -1,821 +0,0 @@ -#ifdef __ALTIVEC__ - -/* Altivec support for RTEMS; vector register context management. */ - -/* - * Authorship - * ---------- - * This software was created by - * Till Straumann , 2009, - * Stanford Linear Accelerator Center, Stanford University. - * - * Acknowledgement of sponsorship - * ------------------------------ - * This software was produced by - * the Stanford Linear Accelerator Center, Stanford University, - * under Contract DE-AC03-76SFO0515 with the Department of Energy. - * - * Government disclaimer of liability - * ---------------------------------- - * Neither the United States nor the United States Department of Energy, - * nor any of their employees, makes any warranty, express or implied, or - * assumes any legal liability or responsibility for the accuracy, - * completeness, or usefulness of any data, apparatus, product, or process - * disclosed, or represents that its use would not infringe privately owned - * rights. - * - * Stanford disclaimer of liability - * -------------------------------- - * Stanford University makes no representations or warranties, express or - * implied, nor assumes any liability for the use of this software. - * - * Stanford disclaimer of copyright - * -------------------------------- - * Stanford University, owner of the copyright, hereby disclaims its - * copyright and all other rights in this software. Hence, anyone may - * freely use it for any purpose without restriction. - * - * Maintenance of notices - * ---------------------- - * In the interest of clarity regarding the origin and status of this - * SLAC software, this and all the preceding Stanford University notices - * are to remain affixed to any copy or derivative of this software made - * or distributed by the recipient and are to be affixed to any copy of - * software made or distributed by the recipient that contains a copy or - * derivative of this software. - * - * ------------------ SLAC Software Notices, Set 4 OTT.002a, 2004 FEB 03 - */ - - -#include - -#ifndef PPC_CACHE_ALIGNMENT -#error "Missing header; PPC_CACHE_ALIGNMENT is not defined" -#endif - -#define ALTIVEC_TESTING - -#if PPC_CACHE_ALIGNMENT != 32 -#error "Altivec support assumes cache-line size is 32 bytes!" -#else -#undef LD_PPC_CACHE_ALIGNMENT -#define LD_PPC_CACHE_ALIGNMENT 5 -#endif - - .set v0, 0 - .set v8, 8 - .set v16, 16 - .set v20, 20 - .set v24, 24 - .set v28, 28 - - .set r0, 0 - .set r3, 3 - .set r4, 4 - /* Do not use r5, since this is used by _CPU_Context_switch() */ - .set r6, 6 - .set r7, 7 - .set r8, 8 - .set r9, 9 - .set r10, 10 - .set r11, 11 - /* Do not use r12, since this is used by _CPU_Context_switch() */ - - .set cr5, 5 - - .set VECSIZE, 16 - - .set VRSAVE_INIT_VAL, 0 - .set VSCR_INIT_VAL, 0 - - .set VRSAVE_OFF, 16 - .set VSCR_OFF, 16+12 - - .set ds0, 0 - - /* Block size for dst -- in units of 16-bytes */ - .set BSIZE, 2 /* = 32 bytes */ - .set BCNT, 12/2+1 /* 12 non-volatile registers + area for vscr/vrsave */ - .set BSTRIDE, 32 /* bytes */ - - .data - - .global _CPU_altivec_vrsave_initval -_CPU_altivec_vrsave_initval: - .long 0 - - .global _CPU_altivec_vscr_initval -_CPU_altivec_vscr_initval: - .long 0 - - .text - - .extern _CPU_altivec_psim_cpu - .extern _CPU_altivec_ctxt_off - - .macro CMPOFF _B0 - lis \_B0, _CPU_altivec_ctxt_off@ha - lwz \_B0, _CPU_altivec_ctxt_off@l(\_B0) - .endm - - /* Conditionally load or store a vector _VR to - * EA(_R1|0 + _R2) - * If bit _VR (corresponding to _VR) is set in CRC - * then the load/store is performed but otherwise - * it is skipped. - * If compiled with IGNORE_VRSAVE defined then - * the load/store is done unconditionally. - * - * _OPCODE: intended to be lvx, lvxl, stvx or stvxl - * _VR : target vector register - * _R1 : base register (NOTE: _R1=r0 uses a - * implicit ZERO constant, not the contents - * of r0) for address computation. - * _R2 : 'offset' register for address computation. - * - * MODIFIES: _VR on output if a load operation is performed. - * IMPLICIT USE: CRC (unless compiled with IGNORE_VRSAVE - * defined. - */ - .macro LDST _OPCODE, _VR, _R1, _R2 -#ifndef IGNORE_VRSAVE - bc 4, \_VR, 111f -#endif - \_OPCODE \_VR, \_R1, \_R2 -111: - .endm - - /* - * Load or store four 'adjacent' vector registers. - * - * _OPCODE: intended to be lvx, lvxl, stvx or stvxl - * _VR : target vector register - * _R1 : base register (NOTE: _R1=r0 uses a - * implicit ZERO constant, not the contents - * of r0) for address computation. - * _B0 : base register 0 - * _B1 : base register 1 - * _B2 : base register 2 - * _B3 : base register 3 - * _RO : offset register - * - * memory addresses for _VR, _VR+1, _VR+2, _VR+3 - * are _B0+_RO, _B1+_RO, _B2+_RO, _B3+_RO, respectively. - * - * MODIFIES: _VR, _VR+1, _VR+2, _VR+3 if a load - * operation is performed. - * IMPLICIT USE: see LDST - */ - .macro LDST4 _OPCODE, _VR, _B0, _B1, _B2, _B3, _RO - LDST _OPCODE=\_OPCODE _VR=\_VR+0 _R1=\_B0 _R2=\_RO - LDST _OPCODE=\_OPCODE _VR=\_VR+1 _R1=\_B1 _R2=\_RO - LDST _OPCODE=\_OPCODE _VR=\_VR+2 _R1=\_B2 _R2=\_RO - LDST _OPCODE=\_OPCODE _VR=\_VR+3 _R1=\_B3 _R2=\_RO - .endm - - /* - * Preload/zero two cache lines and save 4 vector registers - * to memory. - * Note that the cache operation targets memory *past* the - * current storage area which should hopefully hit when - * This same code is executed on the next two cache lines... - * - * This code effectively does - * dcbz (_B0 + 64) - * dcbz (_B0 + 64 + 32) - * stvx _VF+0, (_B0+ 0) - * stvx _VF+1, (_B0+16) - * stvx _VF+2, (_B0+32) - * stvx _VF+3, (_B0+48) - * - * _LRU: may be 'l' or empty. The former variant should be - * used when it is conceivable that the memory area is - * unlikely to be used in the near future thus making - * it a candidate for early eviction from the caches. - * - * If it is likely that the memory area is reused soon - * (e.g., save/restore across ISR execution) then the - * 'stvx' opcode (w/o 'l' suffix) should be used. - * - * _VR: first of four target vector registers; _VR+0, - * _VR+1, _VR+2, _VR+3 are saved. - * - * _BO: base address of memory area. - * _B1: should contain _B0+16 on entry - * _B2: should contain _B0+32 on entry - * _B3: should contain _B0+48 on entry - * - * _O1: contains the offset where the four vectors are - * stored. - * _VR -> (_B0 + _O1) = (_B0 + _O1 + 0 ) - * _VR+1-> (_B1 + _O1) = (_B0 + _O1 + 16 ) - * _VR+2-> (_B2 + _O1) = (_B0 + _O1 + 32 ) - * _VR+3-> (_B3 + _O1) = (_B0 + _O1 + 48 ) - * _O2: is set to _O1 + 64 by this macro. Hence _O2 is - * used to address the two cache-lines past the - * current memory area. - * - * MODIFIES: _O2; contains _O1 + 64 after execution of this - * code. - * - * NOTES: a different set of four vectors can be addressed - * simply by changing the one offset register _O1. - * - * Saving more than 4 registers can simply be - * achieved by expanding this macro multiple - * times with _O1 and _O2 swapped (new _O1 - * becomes _O2 = old _O1 + 64) thus stepping - * through the memory area. - * - */ - .macro S4VEC_P _LRU, _VR, _B0, _B1, _B2, _B3, _O1, _O2 - addi \_O2, \_O1, 2*PPC_CACHE_ALIGNMENT - dcbz \_B0, \_O2 - dcbz \_B2, \_O2 - LDST4 _OPCODE=stvx\_LRU _VR=\_VR _B0=\_B0 _B1=\_B1 _B2=\_B2 _B3=\_B3 _RO=\_O1 - .endm - - /* - * Save eight vector registers by expanding S4VEC_P twice. - * See notes for S4VEC_P above. - * - * INPUTS: _B0, _B1, _B2, _B3, _B3, _O1 must be preloaded (see above) - * - * MODIFIES: After execution, - * _O2 contains original _O1 + 64, - * _O1 contains original _O1 + 128 - * - * NOTES: Expanding this macro multiple times lets you save - * multiple blocks of 8 registers (no reload of _Bx / _Ox is needed). - */ - .macro S8VEC_P _LRU, _VR, _B0, _B1, _B2, _B3, _O1, _O2 - S4VEC_P \_LRU _VR=\_VR+0 _B0=\_B0 _B1=\_B1 _B2=\_B2 _B3=\_B3 _O1=\_O1 _O2=\_O2 - /* Note that the roles of _O1 and _O2 are swapped here */ - S4VEC_P \_LRU _VR=\_VR+4 _B0=\_B0 _B1=\_B1 _B2=\_B2 _B3=\_B3 _O1=\_O2 _O2=\_O1 - .endm - - /* - * Save volatile vector registers v0..v19 to memory area starting at (_B0 + _O1) - * - * See notes above (for S4VEC_P). - * - * INPUTS: _B0, _B1, _B2, _B3, _B3, _O1 must be preloaded (see above) - * MODIFIES: _O1 contains original _O1 + 256 - * _O2 contains original _O1 + 256 - 64 - */ - .macro S_V0TOV19 _LRU, _B0, _B1, _B2, _B3, _O1, _O2 - S8VEC_P \_LRU _VR=v0 _B0=\_B0 _B1=\_B1 _B2=\_B2 _B3=\_B3 _O1=\_O1 _O2=\_O2 - S8VEC_P \_LRU _VR=v8 _B0=\_B0 _B1=\_B1 _B2=\_B2 _B3=\_B3 _O1=\_O1 _O2=\_O2 - LDST4 stvx\_LRU _VR=v16 _B0=\_B0 _B1=\_B1 _B2=\_B2 _B3=\_B3 _RO=\_O1 - .endm - - /* - * Save non-volatile vector registers v20..v31 to memory area starting at (_B0 + _O1) - * - * See notes above (for S4VEC_P, S_V0TOV19). - * - * INPUTS: _B0, _B1, _B2, _B3, _B3, _O1 must be preloaded (see above) - * MODIFIES: _O1 contains original _O1 + 128 - * _O2 contains original _O1 + 128 - 64 - */ - .macro S_V20TOV31 _LRU, _B0, _B1, _B2, _B3, _O1, _O2 - S8VEC_P \_LRU _VR=v20 _B0=\_B0 _B1=\_B1 _B2=\_B2 _B3=\_B3 _O1=\_O1 _O2=\_O2 - LDST4 stvx\_LRU v28 \_B0 \_B1 \_B2 \_B3 \_O1 - .endm - - /* - * Save all registers to memory area - * - * INPUTS: _B0, _B1, _B2, _B3, _B3, _O1 must be preloaded (see above) - * MODIFIES: _O1 contains original _O1 + 512 - * _O2 contains original _O1 + 512 - 64 - */ - .macro S_V0TOV31 _B0, _B1, _B2, _B3, _O1, _O2 - S8VEC_P l v0 \_B0 \_B1 \_B2 \_B3 \_O1 \_O2 - S8VEC_P l v8 \_B0 \_B1 \_B2 \_B3 \_O1 \_O2 - S8VEC_P l v16 \_B0 \_B1 \_B2 \_B3 \_O1 \_O2 - S4VEC_P l v24 \_B0 \_B1 \_B2 \_B3 \_O1 \_O2 - LDST4 stvxl v28 \_B0 \_B1 \_B2 \_B3 \_O2 - .endm - - - /* - * Macros that expand to 'dcbt _RA, _RB' or nothing, respectively. - * We can pass either of them as arguments to another macro which - * allows us to decide if the main macro uses dcbt or not when - * we expand it... - */ - .macro DO_DCBT _RA, _RB - dcbt \_RA, \_RB - .endm - - .macro NO_DCBT _RA, _RB - .endm - - /* - * NOTE REGARDING dcbt VS dst - * - * Preloading the cache with memory areas that we soon need - * can be done either using 'dcbt' or 'dst' instructions - * "ahead of time". - * When experimenting (on a mpc7457) I found that the 'dst' - * stream instruction was very efficient if there is enough - * time to read ahead. It works well when we do a context - * switch: - * - * 1) start DST on new context to be loaded - * 2) save old context to memory - * 3) load new context from memory - * - * Because of the interleaved step 2) dst works nicely and - * 3) finds what it needs in the cache. - * - * However, in a situation when there is not much time - * to start the DST, e.g., because we want to restore - * a context out of the blue (e.g., after returning - * from and ISR): - * - * 1) save volatile registers to memory/stack - * 2) execute ISR - * 3) might do a task context switch - * 4) when returned to old task context then - * reload volatile registers from memory/stack. - * - * In this situation, preloading the target memory before - * or after step 1) makes obviously no sense because after - * 1) the registers area is most likely in the cache already. - * - * Starting preload after 2) doesn't make much sense either. - * If ISR doesn't lead to a context switch then it is quite - * likely that the register area is still in the cache. - * OTOTH, if a context switch happens then the preload after 2) - * might be useless. - * - * This leaves us at step 4) where we want to load immediately. - * In this case, I found that 'dcbt' works more efficiently - * so that's what we use when restoring volatile registers. - * - * When restoring the non-volatile VRs during a 'normal' - * context switch then we shall use DST (and no dcbt). - */ - - /* - * Symmetric to S4VEC_P above but addresses loading four - * vector registers from memory. - * - * Touches two cache lines past the current memory area - * and loads four vectors from the current area. - * - * Optionally, the DCBT operation may be omitted - * (when expanding with _DCBT=NO_DCBT). - * This is useful if the cache was already preloaded - * by another means (dst instruction). - * - * NOTE: We always use the 'LRU' form of lvx: lvxl, - * because we deem it unlikely that the context - * that was just loaded has to be saved again - * to memory in the immediate future. - * - * INPUTS: _B0, _B1, _B2, _B3, _O1 must be loaded - * as explained above. - * - * MODIFIES: _O2 contains original _O1 + 64. - * _VR.._VR+3 loaded from memory. - */ - .macro L4VEC_A _DCBT, _VR, _B0, _B1, _B2, _B3, _O1, _O2 - addi \_O2, \_O1, 2*PPC_CACHE_ALIGNMENT - /* preload/touch 2 lines at offset 64 from _B0 */ - \_DCBT \_B0, \_O2 - \_DCBT \_B2, \_O2 - /* load four vectors at off set 0 from _B0 */ - LDST4 lvxl, \_VR, \_B0, \_B1, \_B2, \_B3, \_O1 - .endm - - /* - * Symmetric to S8VEC_P; loads 8 vector registers - * from memory -- see comments above... - * - * INPUTS: _B0, _B1, _B2, _B3, _O1 must be loaded - * as explained above. - * - * MODIFIES: _O1 contains original _O1 + 128. - * _O2 contains original _O1 + 64. - * _VR.._VR+7 loaded from memory. - */ - .macro L8VEC_A _DCBT, _VR, _B0, _B1, _B2, _B3, _O1, _O2 - L4VEC_A \_DCBT, \_VR+0, \_B0, \_B1, \_B2, \_B3, \_O1, \_O2 - L4VEC_A \_DCBT, \_VR+4, \_B0, \_B1, \_B2, \_B3, \_O2, \_O1 - .endm - - /* - * Load volatile vector registers v0..v19 employing - * the DCBT to preload the cache. The rationale for - * using DCBT here but not when restoring non-volatile - * registers is explained above, see - * - * "NOTE REGARDING dcbt VS dst" - * - * INPUTS: _B0, _B1, _B2, _B3, _O1 must be loaded - * as explained above. - * - * MODIFIES: _O1 contains original _O1 + 256. - * _O2 contains original _O1 + 256 - 64. - * VR0..VR19 loaded from memory. - */ - .macro L_V0TOV19 _B0, _B1, _B2, _B3, _O1, _O2 - L8VEC_A DO_DCBT, v0, \_B0, \_B1, \_B2, \_B3, \_O1, \_O2 - L8VEC_A DO_DCBT, v8, \_B0, \_B1, \_B2, \_B3, \_O1, \_O2 - LDST4 lvxl, v16, \_B0, \_B1, \_B2, \_B3, \_O1 - .endm - - /* - * Load non-volatile vector registers v20..v31. - * Note that no DCBT is performed since we use - * DST for preloading the cache during a context - * switch, see - * - * "NOTE REGARDING dcbt VS dst" - * - * INPUTS: _B0, _B1, _B2, _B3, _O1 must be loaded - * as explained above. - * - * MODIFIES: _O1 contains original _O1 + 128. - * _O2 contains original _O1 + 128 - 64. - * VR20..VR31 loaded from memory. - */ - .macro L_V20TOV31 _B0, _B1, _B2, _B3, _O1, _O2 - L8VEC_A NO_DCBT, v20, \_B0, \_B1, \_B2, \_B3, \_O1, \_O2 - LDST4 lvxl, v28, \_B0, \_B1, \_B2, \_B3, \_O1 - .endm - - /* - * Load all registers from memory area. - */ - .macro L_V0TOV31 _B0, _B1, _B2, _B3, _O1, _O2 - L8VEC_A DO_DCBT, v0, \_B0, \_B1, \_B2, \_B3, \_O1, \_O2 - L8VEC_A DO_DCBT, v8, \_B0, \_B1, \_B2, \_B3, \_O1, \_O2 - L8VEC_A DO_DCBT, v16, \_B0, \_B1, \_B2, \_B3, \_O1, \_O2 - L4VEC_A DO_DCBT, v24, \_B0, \_B1, \_B2, \_B3, \_O1, \_O2 - LDST4 lvxl, v28, \_B0, \_B1, \_B2, \_B3, \_O2 - .endm - - /* - * Compute - * _B1 = _B0 + 16 - * _B2 = _B0 + 32 - * _B3 = _B0 + 48 - * and load - * _RO = 0 - * - * convenience macro to be expanded before - * any of the load/store macros that use - * four base addresses etc. - * - * INPUT: _B0 = cache-aligned start of memory area - * - * MODIFIES: _B1, _B2, _B3, _RO as described above. - */ - .macro CMP_BASES _B0, _B1, _B2, _B3, _RO - addi \_B1, \_B0, 1*VECSIZE - addi \_B2, \_B0, 2*VECSIZE - addi \_B3, \_B0, 3*VECSIZE - li \_RO, 0 - .endm - - /* - * Prepare for saving general vector registers. - * - * If not built with #define IGNORE_VRSAVE then - * - * 1) copy vrsave to CRC - * - * endif - * - * 2) copy vrsave to _VRSAVE_REG - * 3) preload/zero cache line where vrsave and vscr are stored. - * 4) compute base adresses from _B0 - * 5) preload/zero first two cache lines (remember that the - * first S8VEC_P starts preloading/zeroing at offset 64). - * - * INPUT: 'vrsave' register, _B0 (base address of memory area) - * MODIFIES: _VRSAVE_REG (holds contents of 'vrsave') - * _B0 = original _BO + 32 - * _B1 = original _B0 + 32 + 16, - * _B2 = original _B0 + 32 + 32, - * _B3 = original _B0 + 32 + 48, - * CRC = 'vrsave' (ONLY IF COMPILED with IGNORE_VRSAVE undefined) - */ - .macro PREP_FOR_SAVE _VRSAVE_REG, _B0, _B1, _B2, _B3, _RO - mfvrsave \_VRSAVE_REG -#ifndef IGNORE_VRSAVE - mtcr \_VRSAVE_REG -#endif - dcbz 0, \_B0 - addi \_B0, \_B0, PPC_CACHE_ALIGNMENT - dcbz 0, \_B0 - CMP_BASES \_B0, \_B1, \_B2, \_B3, \_RO - dcbz 0, \_B2 - .endm - - /* - * Store _VRSAVE_REG and _VSCR_VREG to memory. These registers - * must have been loaded from 'vrsave' and 'vscr', respectively, - * prior to expanding this macro. - * - * INPUTS: _VRSAVE_REG GPR holding 'vrsave' contents - * _VSCR_VREG VR holding 'vscr' contents - * _B0 cache-aligned (base) address of memory area. - * MODIFIES: _SCRATCH_REG - */ - .macro S_VSCR_VRSAVE _VRSAVE_REG, _VSCR_VREG, _B0, _SCRATCH_REG - stw \_VRSAVE_REG, - PPC_CACHE_ALIGNMENT + VRSAVE_OFF(\_B0) - li \_SCRATCH_REG, - PPC_CACHE_ALIGNMENT + VSCR_OFF - stvewx \_VSCR_VREG, \_B0, \_SCRATCH_REG - .endm - - /* - * Load 'vrsave' and 'vscr' from memory. - * - * INPUTS: _B0 cache-aligned (base) address of memory area. - * MODIFIES: _SCRATCH_REG (gpr), _SCRATCH_VREG (vr) - * 'vscr', 'vrsave'. - * CRC (holds contents of 'vrsave') (ONLY IF COMPILED - * with IGNORE_VRSAVE undefined). - */ - .macro L_VSCR_VRSAVE _B0, _SCRATCH_REG, _SCRATCH_VREG - lwz \_SCRATCH_REG, - PPC_CACHE_ALIGNMENT + VRSAVE_OFF(\_B0) - mtvrsave \_SCRATCH_REG -#ifndef IGNORE_VRSAVE - mtcr \_SCRATCH_REG -#endif - li \_SCRATCH_REG, - PPC_CACHE_ALIGNMENT + VSCR_OFF - lvewx \_SCRATCH_VREG, \_B0, \_SCRATCH_REG - mtvscr \_SCRATCH_VREG - .endm - - /* - * _B0 &= ~ (PPC_CACHE_ALIGNMENT - 1) - * - * INPUT: _B0 - * MODIFIES: _B0 (as stated above) - */ - .macro CACHE_DOWNALGN _B0 - rlwinm \_B0, \_B0, 0, 0, 31-LD_PPC_CACHE_ALIGNMENT - .endm - - .text - - .global _CPU_save_altivec_volatile -_CPU_save_altivec_volatile: - /* Align address up to next cache-line boundary */ - addi r3, r3, PPC_CACHE_ALIGNMENT - 1 - CACHE_DOWNALGN r3 - -#ifndef IGNORE_VRSAVE - /* Save CRC -- it is used implicitly by all the LOAD/STORE macros - * when testing if we really should do the load/store operation. - */ - mfcr r9 -#endif - - PREP_FOR_SAVE r0, r3, r4, r8, r6, r10 - /* r0 now contains VRSAVE, r3 still the aligned memory area - * and r4, r8, r6 are offset by 16, 32, and 48 bytes from r3, - * respectively. r10 holds zero - */ - S_V0TOV19 _B0=r3, _B1=r4, _B2=r8, _B3=r6, _O1=r10, _O2=r11 - mfvscr v0 - /* Store vrsave (still in r0) and vscr (in v0) to memory area */ - S_VSCR_VRSAVE r0, v0, r3, r11 - -#ifndef IGNORE_VRSAVE - /* Restore CRC */ - mtcr r9 -#endif - blr - - .global _CPU_load_altivec_volatile -_CPU_load_altivec_volatile: - /* Align address up to next cache-line boundary */ - addi r3, r3, PPC_CACHE_ALIGNMENT - 1 - CACHE_DOWNALGN r3 -#ifndef IGNORE_VRSAVE - /* Save CRC -- it is used implicitly by all the LOAD/STORE macros - * when testing if we really should do the load/store operation. - */ - mfcr r9 -#endif - - /* Try to preload 1st line (where vscr and vrsave are stored) */ - dcbt 0, r3 - /* Point to start of general vector-register area */ - addi r3, r3, PPC_CACHE_ALIGNMENT - /* Start preloading 2nd line (where first two vectors are) */ - dcbt 0, r3 - L_VSCR_VRSAVE r3, r0, v0 - CMP_BASES r3, r4, r8, r6, r10 - /* Start preloading 3rd line (where vectors 3 and 4 are) */ - dcbt 0, r8 - L_V0TOV19 r3, r4, r8, r6, r10, r11 - -#ifndef IGNORE_VRSAVE - mtcr r9 -#endif - blr - - .global _CPU_Context_switch_altivec -_CPU_Context_switch_altivec: - - /* fetch offset of altivec area in context */ - CMPOFF r8 - /* down-align 'to' area to cache-line boundary */ - add r4, r4, r8 - CACHE_DOWNALGN r4 - - /* Check for PSIM */ - lis r6, _CPU_altivec_psim_cpu@ha - lwz r6, _CPU_altivec_psim_cpu@l(r6) - cmpli 0, r6, 0 - bne 1f - /* Skip data-stream instructions on PSIM (not implemented) */ - dssall - /* Pre-load new context into cache */ - lis r6, (BSIZE<<(24-16)) | (BCNT<<(16-16)) - ori r6, r6, BSTRIDE - dstt r4, r6, ds0 -1: - -#ifndef IGNORE_VRSAVE - /* Save CRC -- it is used implicitly by all the LOAD/STORE macros - * when testing if we really should do the load/store operation. - */ - mfcr r9 -#endif - - /* Is 'from' context == NULL ? (then we just do a 'restore') */ - cmpli 0, r3, 0 - beq 1f /* yes: skip saving 'from' context */ - - /* SAVE NON-VOLATILE REGISTERS */ - - /* Compute aligned destination pointer (r8 still holds offset - * to 'altivec' area in context) - */ - add r3, r3, r8 - CACHE_DOWNALGN r3 - - PREP_FOR_SAVE r0, r3, r8, r6, r7, r10 - /* The manual says reading vscr can take some time - do - * read it here (into a volatile vector register) while - * we wait for cache blocks to be allocated - */ - mfvscr v0 - S_V20TOV31 _LRU=l, _B0=r3, _B1=r8, _B2=r6, _B3=r7, _O1=r10, _O2=r11 - /* vrsave is now in r0 (PREP_FOR_SAVE), vscr in v0 */ - S_VSCR_VRSAVE r0, v0, r3, r8 - -1: - - /* LOAD NON-VOLATILE REGISTERS */ - - /* Advance past vrsave/vscr area */ - addi r4, r4, PPC_CACHE_ALIGNMENT - L_VSCR_VRSAVE r4, r0, v0 - CMP_BASES r4, r8, r6, r7, r10 - L_V20TOV31 r4, r8, r6, r7, r10, r11 - -#ifndef IGNORE_VRSAVE - mtcr r9 -#endif - blr - - .global _CPU_Context_initialize_altivec -_CPU_Context_initialize_altivec: - CMPOFF r8 - add r3, r3, r8 - CACHE_DOWNALGN r3 - lis r8, _CPU_altivec_vrsave_initval@ha - lwz r8, _CPU_altivec_vrsave_initval@l(r8) - stw r8, VRSAVE_OFF(r3) - lis r6, _CPU_altivec_vscr_initval@ha - lwz r6, _CPU_altivec_vscr_initval@l(r6) - stw r6, VSCR_OFF(r3) - blr - - /* - * Change the initial value of VRSAVE. - * Can be used by initialization code if - * it is determined that code was compiled - * with -mvrsave=no. In this case, VRSAVE - * must be set to all-ones which causes this - * support code to save/restore *all* registers - * (only has an effect if IGNORE_VRSAVE is - * not defined -- otherwise all registers are - * saved/restored anyways). - */ - .global _CPU_altivec_set_vrsave_initval -_CPU_altivec_set_vrsave_initval: - lis r8, _CPU_altivec_vrsave_initval@ha - stw r3, _CPU_altivec_vrsave_initval@l(r8) - mtvrsave r3 - blr - -#ifdef ALTIVEC_TESTING - .global msr_VE_on -msr_VE_on: - mfmsr r3 - oris r3, r3, 1<<(31-6-16) - mtmsr r3 - blr - - .global msr_VE_off -msr_VE_off: - mfmsr r3 - lis r4, 1<<(31-6-16) - andc r3, r3, r4 - mtmsr r3 - blr - - - .global mfvrsave -mfvrsave: - mfvrsave r3 - blr - - .global mtvrsave -mtvrsave: - mtvrsave r3 - blr - - /* Load all vector registers from memory area. - * NOTE: This routine is not strictly ABI compliant -- - * it guarantees that volatile vector registers - * have certain values on exit! - */ - .global _CPU_altivec_load_all -_CPU_altivec_load_all: - /* Align address up to next cache-line boundary */ - addi r3, r3, PPC_CACHE_ALIGNMENT - 1 - CACHE_DOWNALGN r3 -#ifndef IGNORE_VRSAVE - /* Save CRC -- it is used implicitly by all the LOAD/STORE macros - * when testing if we really should do the load/store operation. - */ - mfcr r9 -#endif - - /* Try to preload 1st line (where vscr and vrsave are stored) */ - dcbt 0, r3 - /* Point to start of general vector-register area */ - addi r3, r3, PPC_CACHE_ALIGNMENT - /* Start preloading 2nd line (where first two vectors are) */ - dcbt 0, r3 - L_VSCR_VRSAVE r3, r0, v0 - CMP_BASES r3, r4, r8, r6, r10 - /* Start preloading 3rd line (where vectors 3 and 4 are) */ - dcbt 0, r8 - L_V0TOV31 r3, r4, r8, r6, r10, r11 - -#ifndef IGNORE_VRSAVE - mtcr r9 -#endif - blr - - .global _CPU_altivec_save_all -_CPU_altivec_save_all: - /* Align address up to next cache-line boundary */ - addi r3, r3, PPC_CACHE_ALIGNMENT - 1 - CACHE_DOWNALGN r3 - -#ifndef IGNORE_VRSAVE - /* Save CRC -- it is used implicitly by all the LOAD/STORE macros - * when testing if we really should do the load/store operation. - */ - mfcr r9 -#endif - - PREP_FOR_SAVE r0, r3, r4, r8, r6, r10 - /* r0 now contains VRSAVE, r3 still the aligned memory area - * and r4, r8, r6 are offset by 16, 32, and 48 bytes from r3, - * respectively. r10 holds zero - */ - S_V0TOV31 _B0=r3, _B1=r4, _B2=r8, _B3=r6, _O1=r10, _O2=r11 - mfvscr v0 - /* Store vrsave (still in r0) and vscr (in v0) to memory area */ - S_VSCR_VRSAVE r0, v0, r3, r11 - -#ifndef IGNORE_VRSAVE - /* Restore CRC */ - mtcr r9 -#endif - blr - - -#if 0 - .gnu_attribute 4,1 - .gnu_attribute 8,1 -#endif - -#endif -#endif -- cgit v1.2.3