summaryrefslogtreecommitdiffstats
path: root/c/src/lib/libcpu
diff options
context:
space:
mode:
authorSebastian Huber <sebastian.huber@embedded-brains.de>2018-03-26 06:57:10 +0200
committerSebastian Huber <sebastian.huber@embedded-brains.de>2018-03-26 10:40:49 +0200
commit4fd1ff0f0d8d1e3029f488a011acd83115dccdef (patch)
tree5b7f64447e19e8e98b687c9a472963decf8b3ded /c/src/lib/libcpu
parentbsps/powerpc: Move dec clock driver to bsps (diff)
downloadrtems-4fd1ff0f0d8d1e3029f488a011acd83115dccdef.tar.bz2
bsps/powerpc: Move AltiVec support to bsps
This patch is a part of the BSP source reorganization. Update #3285.
Diffstat (limited to 'c/src/lib/libcpu')
-rw-r--r--c/src/lib/libcpu/powerpc/Makefile.am9
-rw-r--r--c/src/lib/libcpu/powerpc/mpc6xx/altivec/README184
-rw-r--r--c/src/lib/libcpu/powerpc/mpc6xx/altivec/vec_sup.c273
-rw-r--r--c/src/lib/libcpu/powerpc/mpc6xx/altivec/vec_sup_asm.S821
4 files changed, 0 insertions, 1287 deletions
diff --git a/c/src/lib/libcpu/powerpc/Makefile.am b/c/src/lib/libcpu/powerpc/Makefile.am
index 1c15f2cc59..9d19f0e13d 100644
--- a/c/src/lib/libcpu/powerpc/Makefile.am
+++ b/c/src/lib/libcpu/powerpc/Makefile.am
@@ -39,13 +39,4 @@ if ppc405
## ppc4xx/include
endif # ppc405
-if mpc6xx
-# mpc6xx/altivec
-noinst_PROGRAMS += mpc6xx/altivec.rel
-mpc6xx_altivec_rel_SOURCES = mpc6xx/altivec/vec_sup.c mpc6xx/altivec/vec_sup_asm.S
-mpc6xx_altivec_rel_CPPFLAGS = $(AM_CPPFLAGS)
-mpc6xx_altivec_rel_LDFLAGS = $(RTEMS_RELLDFLAGS)
-endif
-EXTRA_DIST += mpc6xx/altivec/README
-
include $(top_srcdir)/../../../automake/local.am
diff --git a/c/src/lib/libcpu/powerpc/mpc6xx/altivec/README b/c/src/lib/libcpu/powerpc/mpc6xx/altivec/README
deleted file mode 100644
index 61ebb8dded..0000000000
--- a/c/src/lib/libcpu/powerpc/mpc6xx/altivec/README
+++ /dev/null
@@ -1,184 +0,0 @@
-RTEMS ALTIVEC SUPPORT
-=====================
-
-1. History
-----------
-
-Altivec support was developed and maintained as a user-extension
-outside of RTEMS. This extension is still available (unbundled)
-from Till Straumann <strauman@slac.stanford.edu>; it is useful
-if an application desires 'lazy switching' of the altivec context.
-
-2. Modes
---------
-
-Altivec support -- the unbundled extension, that is -- can be used
-in two ways:
-
-a. All tasks are implicitly AltiVec-enabled.
-
-b. Only designated tasks are AltiVec-enabled. 'Lazy-context switching'
- is implemented to switch AltiVec the context.
-
-Note that the code implemented in this directory supports mode 'a'
-and mode 'a' ONLY. For mode 'b' you need the unbundled extension
-(which is completely independent of this code).
-
-Mode 'a' (All tasks are AltiVec-enabled)
-- - - - - - - - - - - - - - - - - - - - -
-
-The major disadvantage of this mode is that additional overhead is
-involved: tasks that never use the vector unit still save/restore
-the volatile vector registers (20 registers * 16bytes each) across
-every interrupt and all non-volatile registers (12 registers * 16b each)
-during every context switch.
-
-However, saving/restoring e.g., the volatile registers is quite
-fast -- on my 1GHz 7457 saving or restoring 20 vector registers
-takes only about 1us or even less (if there are cache hits).
-
-The advantage is complete transparency to the user and full ABI
-compatibility (exept for ISRs and exception handlers), see below.
-
-Mode 'b' (Only dedicated tasks are AltiVec-enabled)
-- - - - - - - - - - - - - - - - - - - - - - - - - -
-
-The advantage of this mode of operation is that the vector-registers
-are only saved/restored when a different, altivec-enabled task becomes
-ready to run. In particular, if there is only a single altivec-enabled
-task then the altivec-context *never* is switched.
-
-Note that this mode of operation is not supported by the code
-in this directory -- you need the unbundled altivec extension
-mentioned above.
-
-3. Compiler Options
--------------------
-
-Three compiler options affect AltiVec: -maltivec, -mabi=altivec and
--mvrsave=yes/no.
-
--maltivec: This lets the cpp define the symbol __ALTIVEC__ and enables
- gcc to emit vector instructions. Note that gcc may use the
- AltiVec engine implicitly, i.e., **without you writing any
- vectorized code**.
-
--mabi=altivec: This option has two effects:
- i) It ensures 16-byte stack alignment required by AltiVec
- (even in combination with eabi which is RTEMS' default).
- ii) It allows vector arguments to be passed in vector registers.
-
--mvrsave=yes/no: Instructs gcc to emit code which sets the VRSAVE register
- indicating which vector registers are 'currently in use'.
- Because the altivec support does not use this information *) the
- option has no direct affect but it is desirable to compile with
- -mvrsave=no so that no unnecessary code is generated.
-
- *) The file vec_sup_asm.S conditionally disables usage of
- the VRSAVE information if the preprocessor symbol
- 'IGNORE_VRSAVE' is defined, which is the default.
-
- If 'IGNORE_VRSAVE' is undefined then the code *does*
- use the VRSAVE information but I found that this does
- not execute noticeably faster.
-
-IMPORTANT NOTES
-===============
-
-AFAIK, RTEMS uses the EABI which requires a stack alignment of only 8 bytes
-which is NOT enough for AltiVec (which requires 16-byte alignment).
-
-There are two ways for obtaining 16-byte alignment:
-
-I) Compile with -mno-eabi (ordinary SYSV ABI has 16-byte alignment)
-II) Compile with -mabi=altivec (extension to EABI; maintains 16-byte alignment
- but also allows for passing vector arguments in vector registers)
-
-Note that it is crucial to compile ***absolutely everything*** with the same
-ABI options (or a linker error may occur). In particular, this includes
-
- - newlibc multilib variant
- - RTEMS proper
- - application + third-party code
-
-IMO the proper compiler options for Mode 'a' would be
-
- -maltivec -mabi=altivec -mvrsave=no
-
-Note that the -mcpu=7400 option also enables -maltivec and -mabi=altivec
-but leaves -mvrsave at some 'default' which is probably 'no'.
-Compiling with -mvrsave=yes does not produce incompatible code but
-may have a performance impact (since extra code is produced to maintain
-VRSAVE).
-
-4. Multilib Variants
---------------------
-
-The default GCC configuration for RTEMS contains a -mcpu=7400 multilib
-variant which is the correct one to choose.
-
-5. BSP 'custom' file.
----------------------
-
-Now that you have the necessary newlib and libgcc etc. variants
-you also need to build RTEMS accordingly.
-
-In you BSP's make/custom/<bsp>.cfg file make sure the CPU_CFLAGS
-select the desired variant:
-
-for mode 'a':
-
- CPU_CFLAGS = ... -mcpu=7400
-
-Note that since -maltivec globally defines __ALTIVEC__ RTEMS automatially
-enables code that takes care of switching the AltiVec context as necessary.
-This is transparent to application code.
-
-6. BSP support
---------------
-
-It is the BSP's responsibility to initialize MSR_VE, VSCR and VRSAVE
-during early boot, ideally before any C-code is executed (because it
-may, theoretically, use vector instructions).
-
-The BSP must
-
- - set MSR_VE
- - clear VRSAVE; note that the probing algorithm for detecting
- whether -mvrsave=yes or 'no' was used relies on the BSP
- clearing VRSAVE during early start. Since no interrupts or
- context switches happen before the AltiVec support is initialized
- clearing VRSAVE is no problem even if it turns out that -mvrsave=no
- was in effect (eventually a value of all-ones will be stored
- in VRSAVE in this case).
- - clear VSCR
-
-7. PSIM note
-------------
-
-PSIM supports the AltiVec instruction set with the exception of
-the 'data stream' instructions for cache prefetching. The RTEMS
-altivec support includes run-time checks to skip these instruction
-when executing on PSIM.
-
-Note that AltiVec support within PSIM must be enabled at 'configure'
-time by passing the 'configure' option
-
---enable-sim-float=altivec
-
-Note also that PSIM's AltiVec support has many bugs. It is recommended
-to apply the patches filed as an attachment with gdb bug report #2461
-prior to building PSIM.
-
-The CPU type and corresponding multilib must be changed when
-building RTEMS/psim:
-
- edit make/custom/psim.cfg and change
-
- CPU_CFLAGS = ... -mcpu=603e
-
- to
-
- CPU_CFLAGS = ... -mcpu=7400
-
-This change must be performed *before* configuring RTEMS/psim.
diff --git a/c/src/lib/libcpu/powerpc/mpc6xx/altivec/vec_sup.c b/c/src/lib/libcpu/powerpc/mpc6xx/altivec/vec_sup.c
deleted file mode 100644
index 141779c175..0000000000
--- a/c/src/lib/libcpu/powerpc/mpc6xx/altivec/vec_sup.c
+++ /dev/null
@@ -1,273 +0,0 @@
-/* Altivec support for RTEMS; vector register context management. */
-
-/*
- * Authorship
- * ----------
- * This software was created by
- * Till Straumann <strauman@slac.stanford.edu>, 2009,
- * Stanford Linear Accelerator Center, Stanford University.
- *
- * Acknowledgement of sponsorship
- * ------------------------------
- * This software was produced by
- * the Stanford Linear Accelerator Center, Stanford University,
- * under Contract DE-AC03-76SFO0515 with the Department of Energy.
- *
- * Government disclaimer of liability
- * ----------------------------------
- * Neither the United States nor the United States Department of Energy,
- * nor any of their employees, makes any warranty, express or implied, or
- * assumes any legal liability or responsibility for the accuracy,
- * completeness, or usefulness of any data, apparatus, product, or process
- * disclosed, or represents that its use would not infringe privately owned
- * rights.
- *
- * Stanford disclaimer of liability
- * --------------------------------
- * Stanford University makes no representations or warranties, express or
- * implied, nor assumes any liability for the use of this software.
- *
- * Stanford disclaimer of copyright
- * --------------------------------
- * Stanford University, owner of the copyright, hereby disclaims its
- * copyright and all other rights in this software. Hence, anyone may
- * freely use it for any purpose without restriction.
- *
- * Maintenance of notices
- * ----------------------
- * In the interest of clarity regarding the origin and status of this
- * SLAC software, this and all the preceding Stanford University notices
- * are to remain affixed to any copy or derivative of this software made
- * or distributed by the recipient and are to be affixed to any copy of
- * software made or distributed by the recipient that contains a copy or
- * derivative of this software.
- *
- * ------------------ SLAC Software Notices, Set 4 OTT.002a, 2004 FEB 03
- */
-
-#ifdef __ALTIVEC__
-
-#include <rtems.h>
-#include <libcpu/cpuIdent.h>
-#include <rtems/bspIo.h>
-#include <rtems/error.h>
-#include <rtems/score/cpu.h>
-#include <rtems/powerpc/powerpc.h>
-
-#define STATIC static
-
-#define VEC_ALIGNMENT 16
-
-#define NAM "AltiVec Support"
-#define ERRID(a,b,c,d) (((a)<<24) | ((b)<<16) | ((c)<<8) | (d))
-
-typedef uint32_t _vu32 __attribute__((vector_size(VEC_ALIGNMENT)));
-
-#ifndef MSR_VE
-#define MSR_VE (1<<(31-6))
-#endif
-
-/* NOTE: These two variables are accessed by assembly code
- * which assumes 32-bit data!
- */
-uint32_t _CPU_altivec_ctxt_off = 0;
-uint32_t _CPU_altivec_psim_cpu = 0;
-
-static inline uint32_t
-mfmsr(void)
-{
-uint32_t v;
- _CPU_MSR_GET(v);
- return v;
-}
-
-static inline void
-mtmsr(uint32_t v)
-{
- _CPU_MSR_SET(v);
-}
-
-static inline void
-isync(void)
-{
- asm volatile("isync");
-}
-
-static inline void
-dssall(void)
-{
- if ( !_CPU_altivec_psim_cpu)
- asm volatile("dssall");
-}
-
-static inline uint32_t
-set_MSR_VE(void)
-{
-uint32_t rval;
- rval=mfmsr();
- if ( ! (MSR_VE & rval ) ) {
- mtmsr(rval | MSR_VE);
- isync();
- }
- return rval;
-}
-
-static inline void
-clr_MSR_VE(void)
-{
- dssall();
- mtmsr(mfmsr() & ~MSR_VE);
- isync();
-}
-
-static inline void
-rst_MSR_VE(uint32_t old)
-{
- if ( ! ( MSR_VE & old ) ) {
- dssall();
- mtmsr(old);
- isync();
- }
-}
-
-
-/* Code to probe the compiler's stack alignment (PowerPC);
- * The routine determines at run-time if the compiler generated
- * 8 or 16-byte aligned code.
- *
- * Till Straumann <strauman@slac.stanford.edu>, 2005
- */
-
-static void dummy(void) __attribute__((noinline));
-/* add (empty) asm-statement to make sure this isn't optimized away */
-static void dummy(void) { __asm__ volatile(""); }
-
-static unsigned probe_r1(void) __attribute__((noinline));
-static unsigned probe_r1(void)
-{
-unsigned r1;
- /* call something to enforce creation of a minimal stack frame;
- * (8 bytes: r1 and lr space for 'dummy' callee). If compiled
- * with -meabi -mno-altivec gcc allocates 8 bytes, if -mno-eabi
- * or -maltivec / -mabi=altivec then gcc allocates 16 bytes
- * according to the sysv / altivec ABI specs.
- */
- dummy();
- /* return stack pointer */
- asm volatile("mr %0,1":"=r"(r1));
- return r1;
-}
-
-static unsigned
-probe_ppc_stack_alignment(void)
-{
-unsigned r1;
- asm volatile("mr %0,1":"=r"(r1));
- return (r1 - probe_r1()) & ~ 0xf;
-}
-
-STATIC int check_stack_alignment(void)
-{
-int rval = 0;
- if ( VEC_ALIGNMENT > PPC_STACK_ALIGNMENT ) {
- printk(NAM": CPU support has unsufficient stack alignment;\n");
- printk("modify 'cpukit/score/cpu/powerpc/rtems/score/powerpc.h'\n");
- printk("and choose PPC_ABI_SVR4. I'll enable a workaround for now.\n");
- rval |= 1;
- }
- /* Run-time check; should compile with -mabi=altivec */
- if ( probe_ppc_stack_alignment() < VEC_ALIGNMENT ) {
- printk(NAM": run-time stack alignment unsufficient; make sure you compile with -mabi=altivec\n");
- rval |= 2;
- }
- return rval;
-}
-
-
-static uint32_t probe_vrsave(_vu32 *p_v) __attribute__((noinline));
-
-/* Check if this code was compiled with -mvrsave=yes or no
- * so that we can set the default/init value accordingly.
- */
-static uint32_t probe_vrsave(_vu32 *p_v)
-{
-_vu32 x;
-uint32_t vrsave;
- /* Explicitly clobber a volatile vector reg (0) that is
- * not used to pass return values.
- * If -mvrsave=yes was used this should cause gcc to
- * set bit 0 in vrsave. OTOH this bit cannot be set
- * because v0 is volatile and not used to pass a value
- * to the caller...
- */
- asm volatile("vxor %0, 0, 0; mfvrsave %1":"=v"(x),"=r"(vrsave)::"v0");
- if ( p_v ) {
- *p_v = x;
- }
- return vrsave;
-}
-
-static int vrsave_yes(void) __attribute__((noinline));
-
-static int vrsave_yes(void)
-{
-uint32_t vrsave_pre;
- asm volatile("mfvrsave %0":"=r"(vrsave_pre));
- if ( (vrsave_pre & 0x80000000) ) {
- printk(NAM": WARNING - unable to determine whether -mvrsave was used; assuming NO\n");
- return 0;
- }
- return probe_vrsave(0) != vrsave_pre;
-}
-
-extern void
-_CPU_altivec_set_vrsave_initval(uint32_t);
-
-
-void
-_CPU_Initialize_altivec(void)
-{
-unsigned pvr;
-
- /* I don't like to have to #define the offset of the altivec area
- * for use by assembly code.
- * Therefore, we compute it here and store it in memory...
- */
- _CPU_altivec_ctxt_off = offsetof(ppc_context, altivec);
-
- /*
- * See ppc_get_context() and PPC_CONTEXT_OFFSET_GPR1
- */
- _CPU_altivec_ctxt_off += PPC_DEFAULT_CACHE_LINE_SIZE;
-
- /*
- * Add space possibly needed for alignment
- */
- _CPU_altivec_ctxt_off += PPC_CACHE_ALIGNMENT - 1;
-
- if ( ! vrsave_yes() ) {
- /* They seemed to compile with -mvrsave=no. Hence we
- * must set VRSAVE so that all registers are saved/restored
- * in case this support was not built with IGNORE_VRSAVE.
- */
- _CPU_altivec_set_vrsave_initval( -1 );
- }
-
- if ( check_stack_alignment() & 2 )
- rtems_fatal_error_occurred(ERRID('V','E','C','1'));
-
- pvr = get_ppc_cpu_type();
- /* psim has altivec but lacks the streaming instructions :-( */
- _CPU_altivec_psim_cpu = (PPC_PSIM == pvr);
-
- if ( ! ppc_cpu_has_altivec() ) {
- printk(NAM": This CPU seems not to have AltiVec\n");
- rtems_panic("Unable to initialize AltiVec Support\n");
- }
-
- if ( ! (mfmsr() & MSR_VE) ) {
- printk(NAM": Warning: BSP should set MSR_VE early; doing it now...\n");
- set_MSR_VE();
- }
-}
-#endif
diff --git a/c/src/lib/libcpu/powerpc/mpc6xx/altivec/vec_sup_asm.S b/c/src/lib/libcpu/powerpc/mpc6xx/altivec/vec_sup_asm.S
deleted file mode 100644
index 279d1704a7..0000000000
--- a/c/src/lib/libcpu/powerpc/mpc6xx/altivec/vec_sup_asm.S
+++ /dev/null
@@ -1,821 +0,0 @@
-#ifdef __ALTIVEC__
-
-/* Altivec support for RTEMS; vector register context management. */
-
-/*
- * Authorship
- * ----------
- * This software was created by
- * Till Straumann <strauman@slac.stanford.edu>, 2009,
- * Stanford Linear Accelerator Center, Stanford University.
- *
- * Acknowledgement of sponsorship
- * ------------------------------
- * This software was produced by
- * the Stanford Linear Accelerator Center, Stanford University,
- * under Contract DE-AC03-76SFO0515 with the Department of Energy.
- *
- * Government disclaimer of liability
- * ----------------------------------
- * Neither the United States nor the United States Department of Energy,
- * nor any of their employees, makes any warranty, express or implied, or
- * assumes any legal liability or responsibility for the accuracy,
- * completeness, or usefulness of any data, apparatus, product, or process
- * disclosed, or represents that its use would not infringe privately owned
- * rights.
- *
- * Stanford disclaimer of liability
- * --------------------------------
- * Stanford University makes no representations or warranties, express or
- * implied, nor assumes any liability for the use of this software.
- *
- * Stanford disclaimer of copyright
- * --------------------------------
- * Stanford University, owner of the copyright, hereby disclaims its
- * copyright and all other rights in this software. Hence, anyone may
- * freely use it for any purpose without restriction.
- *
- * Maintenance of notices
- * ----------------------
- * In the interest of clarity regarding the origin and status of this
- * SLAC software, this and all the preceding Stanford University notices
- * are to remain affixed to any copy or derivative of this software made
- * or distributed by the recipient and are to be affixed to any copy of
- * software made or distributed by the recipient that contains a copy or
- * derivative of this software.
- *
- * ------------------ SLAC Software Notices, Set 4 OTT.002a, 2004 FEB 03
- */
-
-
-#include <rtems/powerpc/powerpc.h>
-
-#ifndef PPC_CACHE_ALIGNMENT
-#error "Missing header; PPC_CACHE_ALIGNMENT is not defined"
-#endif
-
-#define ALTIVEC_TESTING
-
-#if PPC_CACHE_ALIGNMENT != 32
-#error "Altivec support assumes cache-line size is 32 bytes!"
-#else
-#undef LD_PPC_CACHE_ALIGNMENT
-#define LD_PPC_CACHE_ALIGNMENT 5
-#endif
-
- .set v0, 0
- .set v8, 8
- .set v16, 16
- .set v20, 20
- .set v24, 24
- .set v28, 28
-
- .set r0, 0
- .set r3, 3
- .set r4, 4
- /* Do not use r5, since this is used by _CPU_Context_switch() */
- .set r6, 6
- .set r7, 7
- .set r8, 8
- .set r9, 9
- .set r10, 10
- .set r11, 11
- /* Do not use r12, since this is used by _CPU_Context_switch() */
-
- .set cr5, 5
-
- .set VECSIZE, 16
-
- .set VRSAVE_INIT_VAL, 0
- .set VSCR_INIT_VAL, 0
-
- .set VRSAVE_OFF, 16
- .set VSCR_OFF, 16+12
-
- .set ds0, 0
-
- /* Block size for dst -- in units of 16-bytes */
- .set BSIZE, 2 /* = 32 bytes */
- .set BCNT, 12/2+1 /* 12 non-volatile registers + area for vscr/vrsave */
- .set BSTRIDE, 32 /* bytes */
-
- .data
-
- .global _CPU_altivec_vrsave_initval
-_CPU_altivec_vrsave_initval:
- .long 0
-
- .global _CPU_altivec_vscr_initval
-_CPU_altivec_vscr_initval:
- .long 0
-
- .text
-
- .extern _CPU_altivec_psim_cpu
- .extern _CPU_altivec_ctxt_off
-
- .macro CMPOFF _B0
- lis \_B0, _CPU_altivec_ctxt_off@ha
- lwz \_B0, _CPU_altivec_ctxt_off@l(\_B0)
- .endm
-
- /* Conditionally load or store a vector _VR to
- * EA(_R1|0 + _R2)
- * If bit _VR (corresponding to _VR) is set in CRC
- * then the load/store is performed but otherwise
- * it is skipped.
- * If compiled with IGNORE_VRSAVE defined then
- * the load/store is done unconditionally.
- *
- * _OPCODE: intended to be lvx, lvxl, stvx or stvxl
- * _VR : target vector register
- * _R1 : base register (NOTE: _R1=r0 uses a
- * implicit ZERO constant, not the contents
- * of r0) for address computation.
- * _R2 : 'offset' register for address computation.
- *
- * MODIFIES: _VR on output if a load operation is performed.
- * IMPLICIT USE: CRC (unless compiled with IGNORE_VRSAVE
- * defined.
- */
- .macro LDST _OPCODE, _VR, _R1, _R2
-#ifndef IGNORE_VRSAVE
- bc 4, \_VR, 111f
-#endif
- \_OPCODE \_VR, \_R1, \_R2
-111:
- .endm
-
- /*
- * Load or store four 'adjacent' vector registers.
- *
- * _OPCODE: intended to be lvx, lvxl, stvx or stvxl
- * _VR : target vector register
- * _R1 : base register (NOTE: _R1=r0 uses a
- * implicit ZERO constant, not the contents
- * of r0) for address computation.
- * _B0 : base register 0
- * _B1 : base register 1
- * _B2 : base register 2
- * _B3 : base register 3
- * _RO : offset register
- *
- * memory addresses for _VR, _VR+1, _VR+2, _VR+3
- * are _B0+_RO, _B1+_RO, _B2+_RO, _B3+_RO, respectively.
- *
- * MODIFIES: _VR, _VR+1, _VR+2, _VR+3 if a load
- * operation is performed.
- * IMPLICIT USE: see LDST
- */
- .macro LDST4 _OPCODE, _VR, _B0, _B1, _B2, _B3, _RO
- LDST _OPCODE=\_OPCODE _VR=\_VR+0 _R1=\_B0 _R2=\_RO
- LDST _OPCODE=\_OPCODE _VR=\_VR+1 _R1=\_B1 _R2=\_RO
- LDST _OPCODE=\_OPCODE _VR=\_VR+2 _R1=\_B2 _R2=\_RO
- LDST _OPCODE=\_OPCODE _VR=\_VR+3 _R1=\_B3 _R2=\_RO
- .endm
-
- /*
- * Preload/zero two cache lines and save 4 vector registers
- * to memory.
- * Note that the cache operation targets memory *past* the
- * current storage area which should hopefully hit when
- * This same code is executed on the next two cache lines...
- *
- * This code effectively does
- * dcbz (_B0 + 64)
- * dcbz (_B0 + 64 + 32)
- * stvx _VF+0, (_B0+ 0)
- * stvx _VF+1, (_B0+16)
- * stvx _VF+2, (_B0+32)
- * stvx _VF+3, (_B0+48)
- *
- * _LRU: may be 'l' or empty. The former variant should be
- * used when it is conceivable that the memory area is
- * unlikely to be used in the near future thus making
- * it a candidate for early eviction from the caches.
- *
- * If it is likely that the memory area is reused soon
- * (e.g., save/restore across ISR execution) then the
- * 'stvx' opcode (w/o 'l' suffix) should be used.
- *
- * _VR: first of four target vector registers; _VR+0,
- * _VR+1, _VR+2, _VR+3 are saved.
- *
- * _BO: base address of memory area.
- * _B1: should contain _B0+16 on entry
- * _B2: should contain _B0+32 on entry
- * _B3: should contain _B0+48 on entry
- *
- * _O1: contains the offset where the four vectors are
- * stored.
- * _VR -> (_B0 + _O1) = (_B0 + _O1 + 0 )
- * _VR+1-> (_B1 + _O1) = (_B0 + _O1 + 16 )
- * _VR+2-> (_B2 + _O1) = (_B0 + _O1 + 32 )
- * _VR+3-> (_B3 + _O1) = (_B0 + _O1 + 48 )
- * _O2: is set to _O1 + 64 by this macro. Hence _O2 is
- * used to address the two cache-lines past the
- * current memory area.
- *
- * MODIFIES: _O2; contains _O1 + 64 after execution of this
- * code.
- *
- * NOTES: a different set of four vectors can be addressed
- * simply by changing the one offset register _O1.
- *
- * Saving more than 4 registers can simply be
- * achieved by expanding this macro multiple
- * times with _O1 and _O2 swapped (new _O1
- * becomes _O2 = old _O1 + 64) thus stepping
- * through the memory area.
- *
- */
- .macro S4VEC_P _LRU, _VR, _B0, _B1, _B2, _B3, _O1, _O2
- addi \_O2, \_O1, 2*PPC_CACHE_ALIGNMENT
- dcbz \_B0, \_O2
- dcbz \_B2, \_O2
- LDST4 _OPCODE=stvx\_LRU _VR=\_VR _B0=\_B0 _B1=\_B1 _B2=\_B2 _B3=\_B3 _RO=\_O1
- .endm
-
- /*
- * Save eight vector registers by expanding S4VEC_P twice.
- * See notes for S4VEC_P above.
- *
- * INPUTS: _B0, _B1, _B2, _B3, _B3, _O1 must be preloaded (see above)
- *
- * MODIFIES: After execution,
- * _O2 contains original _O1 + 64,
- * _O1 contains original _O1 + 128
- *
- * NOTES: Expanding this macro multiple times lets you save
- * multiple blocks of 8 registers (no reload of _Bx / _Ox is needed).
- */
- .macro S8VEC_P _LRU, _VR, _B0, _B1, _B2, _B3, _O1, _O2
- S4VEC_P \_LRU _VR=\_VR+0 _B0=\_B0 _B1=\_B1 _B2=\_B2 _B3=\_B3 _O1=\_O1 _O2=\_O2
- /* Note that the roles of _O1 and _O2 are swapped here */
- S4VEC_P \_LRU _VR=\_VR+4 _B0=\_B0 _B1=\_B1 _B2=\_B2 _B3=\_B3 _O1=\_O2 _O2=\_O1
- .endm
-
- /*
- * Save volatile vector registers v0..v19 to memory area starting at (_B0 + _O1)
- *
- * See notes above (for S4VEC_P).
- *
- * INPUTS: _B0, _B1, _B2, _B3, _B3, _O1 must be preloaded (see above)
- * MODIFIES: _O1 contains original _O1 + 256
- * _O2 contains original _O1 + 256 - 64
- */
- .macro S_V0TOV19 _LRU, _B0, _B1, _B2, _B3, _O1, _O2
- S8VEC_P \_LRU _VR=v0 _B0=\_B0 _B1=\_B1 _B2=\_B2 _B3=\_B3 _O1=\_O1 _O2=\_O2
- S8VEC_P \_LRU _VR=v8 _B0=\_B0 _B1=\_B1 _B2=\_B2 _B3=\_B3 _O1=\_O1 _O2=\_O2
- LDST4 stvx\_LRU _VR=v16 _B0=\_B0 _B1=\_B1 _B2=\_B2 _B3=\_B3 _RO=\_O1
- .endm
-
- /*
- * Save non-volatile vector registers v20..v31 to memory area starting at (_B0 + _O1)
- *
- * See notes above (for S4VEC_P, S_V0TOV19).
- *
- * INPUTS: _B0, _B1, _B2, _B3, _B3, _O1 must be preloaded (see above)
- * MODIFIES: _O1 contains original _O1 + 128
- * _O2 contains original _O1 + 128 - 64
- */
- .macro S_V20TOV31 _LRU, _B0, _B1, _B2, _B3, _O1, _O2
- S8VEC_P \_LRU _VR=v20 _B0=\_B0 _B1=\_B1 _B2=\_B2 _B3=\_B3 _O1=\_O1 _O2=\_O2
- LDST4 stvx\_LRU v28 \_B0 \_B1 \_B2 \_B3 \_O1
- .endm
-
- /*
- * Save all registers to memory area
- *
- * INPUTS: _B0, _B1, _B2, _B3, _B3, _O1 must be preloaded (see above)
- * MODIFIES: _O1 contains original _O1 + 512
- * _O2 contains original _O1 + 512 - 64
- */
- .macro S_V0TOV31 _B0, _B1, _B2, _B3, _O1, _O2
- S8VEC_P l v0 \_B0 \_B1 \_B2 \_B3 \_O1 \_O2
- S8VEC_P l v8 \_B0 \_B1 \_B2 \_B3 \_O1 \_O2
- S8VEC_P l v16 \_B0 \_B1 \_B2 \_B3 \_O1 \_O2
- S4VEC_P l v24 \_B0 \_B1 \_B2 \_B3 \_O1 \_O2
- LDST4 stvxl v28 \_B0 \_B1 \_B2 \_B3 \_O2
- .endm
-
-
- /*
- * Macros that expand to 'dcbt _RA, _RB' or nothing, respectively.
- * We can pass either of them as arguments to another macro which
- * allows us to decide if the main macro uses dcbt or not when
- * we expand it...
- */
- .macro DO_DCBT _RA, _RB
- dcbt \_RA, \_RB
- .endm
-
- .macro NO_DCBT _RA, _RB
- .endm
-
- /*
- * NOTE REGARDING dcbt VS dst
- *
- * Preloading the cache with memory areas that we soon need
- * can be done either using 'dcbt' or 'dst' instructions
- * "ahead of time".
- * When experimenting (on a mpc7457) I found that the 'dst'
- * stream instruction was very efficient if there is enough
- * time to read ahead. It works well when we do a context
- * switch:
- *
- * 1) start DST on new context to be loaded
- * 2) save old context to memory
- * 3) load new context from memory
- *
- * Because of the interleaved step 2) dst works nicely and
- * 3) finds what it needs in the cache.
- *
- * However, in a situation when there is not much time
- * to start the DST, e.g., because we want to restore
- * a context out of the blue (e.g., after returning
- * from and ISR):
- *
- * 1) save volatile registers to memory/stack
- * 2) execute ISR
- * 3) might do a task context switch
- * 4) when returned to old task context then
- * reload volatile registers from memory/stack.
- *
- * In this situation, preloading the target memory before
- * or after step 1) makes obviously no sense because after
- * 1) the registers area is most likely in the cache already.
- *
- * Starting preload after 2) doesn't make much sense either.
- * If ISR doesn't lead to a context switch then it is quite
- * likely that the register area is still in the cache.
- * OTOTH, if a context switch happens then the preload after 2)
- * might be useless.
- *
- * This leaves us at step 4) where we want to load immediately.
- * In this case, I found that 'dcbt' works more efficiently
- * so that's what we use when restoring volatile registers.
- *
- * When restoring the non-volatile VRs during a 'normal'
- * context switch then we shall use DST (and no dcbt).
- */
-
- /*
- * Symmetric to S4VEC_P above but addresses loading four
- * vector registers from memory.
- *
- * Touches two cache lines past the current memory area
- * and loads four vectors from the current area.
- *
- * Optionally, the DCBT operation may be omitted
- * (when expanding with _DCBT=NO_DCBT).
- * This is useful if the cache was already preloaded
- * by another means (dst instruction).
- *
- * NOTE: We always use the 'LRU' form of lvx: lvxl,
- * because we deem it unlikely that the context
- * that was just loaded has to be saved again
- * to memory in the immediate future.
- *
- * INPUTS: _B0, _B1, _B2, _B3, _O1 must be loaded
- * as explained above.
- *
- * MODIFIES: _O2 contains original _O1 + 64.
- * _VR.._VR+3 loaded from memory.
- */
- .macro L4VEC_A _DCBT, _VR, _B0, _B1, _B2, _B3, _O1, _O2
- addi \_O2, \_O1, 2*PPC_CACHE_ALIGNMENT
- /* preload/touch 2 lines at offset 64 from _B0 */
- \_DCBT \_B0, \_O2
- \_DCBT \_B2, \_O2
- /* load four vectors at off set 0 from _B0 */
- LDST4 lvxl, \_VR, \_B0, \_B1, \_B2, \_B3, \_O1
- .endm
-
- /*
- * Symmetric to S8VEC_P; loads 8 vector registers
- * from memory -- see comments above...
- *
- * INPUTS: _B0, _B1, _B2, _B3, _O1 must be loaded
- * as explained above.
- *
- * MODIFIES: _O1 contains original _O1 + 128.
- * _O2 contains original _O1 + 64.
- * _VR.._VR+7 loaded from memory.
- */
- .macro L8VEC_A _DCBT, _VR, _B0, _B1, _B2, _B3, _O1, _O2
- L4VEC_A \_DCBT, \_VR+0, \_B0, \_B1, \_B2, \_B3, \_O1, \_O2
- L4VEC_A \_DCBT, \_VR+4, \_B0, \_B1, \_B2, \_B3, \_O2, \_O1
- .endm
-
- /*
- * Load volatile vector registers v0..v19 employing
- * the DCBT to preload the cache. The rationale for
- * using DCBT here but not when restoring non-volatile
- * registers is explained above, see
- *
- * "NOTE REGARDING dcbt VS dst"
- *
- * INPUTS: _B0, _B1, _B2, _B3, _O1 must be loaded
- * as explained above.
- *
- * MODIFIES: _O1 contains original _O1 + 256.
- * _O2 contains original _O1 + 256 - 64.
- * VR0..VR19 loaded from memory.
- */
- .macro L_V0TOV19 _B0, _B1, _B2, _B3, _O1, _O2
- L8VEC_A DO_DCBT, v0, \_B0, \_B1, \_B2, \_B3, \_O1, \_O2
- L8VEC_A DO_DCBT, v8, \_B0, \_B1, \_B2, \_B3, \_O1, \_O2
- LDST4 lvxl, v16, \_B0, \_B1, \_B2, \_B3, \_O1
- .endm
-
- /*
- * Load non-volatile vector registers v20..v31.
- * Note that no DCBT is performed since we use
- * DST for preloading the cache during a context
- * switch, see
- *
- * "NOTE REGARDING dcbt VS dst"
- *
- * INPUTS: _B0, _B1, _B2, _B3, _O1 must be loaded
- * as explained above.
- *
- * MODIFIES: _O1 contains original _O1 + 128.
- * _O2 contains original _O1 + 128 - 64.
- * VR20..VR31 loaded from memory.
- */
- .macro L_V20TOV31 _B0, _B1, _B2, _B3, _O1, _O2
- L8VEC_A NO_DCBT, v20, \_B0, \_B1, \_B2, \_B3, \_O1, \_O2
- LDST4 lvxl, v28, \_B0, \_B1, \_B2, \_B3, \_O1
- .endm
-
- /*
- * Load all registers from memory area.
- */
- .macro L_V0TOV31 _B0, _B1, _B2, _B3, _O1, _O2
- L8VEC_A DO_DCBT, v0, \_B0, \_B1, \_B2, \_B3, \_O1, \_O2
- L8VEC_A DO_DCBT, v8, \_B0, \_B1, \_B2, \_B3, \_O1, \_O2
- L8VEC_A DO_DCBT, v16, \_B0, \_B1, \_B2, \_B3, \_O1, \_O2
- L4VEC_A DO_DCBT, v24, \_B0, \_B1, \_B2, \_B3, \_O1, \_O2
- LDST4 lvxl, v28, \_B0, \_B1, \_B2, \_B3, \_O2
- .endm
-
- /*
- * Compute
- * _B1 = _B0 + 16
- * _B2 = _B0 + 32
- * _B3 = _B0 + 48
- * and load
- * _RO = 0
- *
- * convenience macro to be expanded before
- * any of the load/store macros that use
- * four base addresses etc.
- *
- * INPUT: _B0 = cache-aligned start of memory area
- *
- * MODIFIES: _B1, _B2, _B3, _RO as described above.
- */
- .macro CMP_BASES _B0, _B1, _B2, _B3, _RO
- addi \_B1, \_B0, 1*VECSIZE
- addi \_B2, \_B0, 2*VECSIZE
- addi \_B3, \_B0, 3*VECSIZE
- li \_RO, 0
- .endm
-
- /*
- * Prepare for saving general vector registers.
- *
- * If not built with #define IGNORE_VRSAVE then
- *
- * 1) copy vrsave to CRC
- *
- * endif
- *
- * 2) copy vrsave to _VRSAVE_REG
- * 3) preload/zero cache line where vrsave and vscr are stored.
- * 4) compute base adresses from _B0
- * 5) preload/zero first two cache lines (remember that the
- * first S8VEC_P starts preloading/zeroing at offset 64).
- *
- * INPUT: 'vrsave' register, _B0 (base address of memory area)
- * MODIFIES: _VRSAVE_REG (holds contents of 'vrsave')
- * _B0 = original _BO + 32
- * _B1 = original _B0 + 32 + 16,
- * _B2 = original _B0 + 32 + 32,
- * _B3 = original _B0 + 32 + 48,
- * CRC = 'vrsave' (ONLY IF COMPILED with IGNORE_VRSAVE undefined)
- */
- .macro PREP_FOR_SAVE _VRSAVE_REG, _B0, _B1, _B2, _B3, _RO
- mfvrsave \_VRSAVE_REG
-#ifndef IGNORE_VRSAVE
- mtcr \_VRSAVE_REG
-#endif
- dcbz 0, \_B0
- addi \_B0, \_B0, PPC_CACHE_ALIGNMENT
- dcbz 0, \_B0
- CMP_BASES \_B0, \_B1, \_B2, \_B3, \_RO
- dcbz 0, \_B2
- .endm
-
- /*
- * Store _VRSAVE_REG and _VSCR_VREG to memory. These registers
- * must have been loaded from 'vrsave' and 'vscr', respectively,
- * prior to expanding this macro.
- *
- * INPUTS: _VRSAVE_REG GPR holding 'vrsave' contents
- * _VSCR_VREG VR holding 'vscr' contents
- * _B0 cache-aligned (base) address of memory area.
- * MODIFIES: _SCRATCH_REG
- */
- .macro S_VSCR_VRSAVE _VRSAVE_REG, _VSCR_VREG, _B0, _SCRATCH_REG
- stw \_VRSAVE_REG, - PPC_CACHE_ALIGNMENT + VRSAVE_OFF(\_B0)
- li \_SCRATCH_REG, - PPC_CACHE_ALIGNMENT + VSCR_OFF
- stvewx \_VSCR_VREG, \_B0, \_SCRATCH_REG
- .endm
-
- /*
- * Load 'vrsave' and 'vscr' from memory.
- *
- * INPUTS: _B0 cache-aligned (base) address of memory area.
- * MODIFIES: _SCRATCH_REG (gpr), _SCRATCH_VREG (vr)
- * 'vscr', 'vrsave'.
- * CRC (holds contents of 'vrsave') (ONLY IF COMPILED
- * with IGNORE_VRSAVE undefined).
- */
- .macro L_VSCR_VRSAVE _B0, _SCRATCH_REG, _SCRATCH_VREG
- lwz \_SCRATCH_REG, - PPC_CACHE_ALIGNMENT + VRSAVE_OFF(\_B0)
- mtvrsave \_SCRATCH_REG
-#ifndef IGNORE_VRSAVE
- mtcr \_SCRATCH_REG
-#endif
- li \_SCRATCH_REG, - PPC_CACHE_ALIGNMENT + VSCR_OFF
- lvewx \_SCRATCH_VREG, \_B0, \_SCRATCH_REG
- mtvscr \_SCRATCH_VREG
- .endm
-
- /*
- * _B0 &= ~ (PPC_CACHE_ALIGNMENT - 1)
- *
- * INPUT: _B0
- * MODIFIES: _B0 (as stated above)
- */
- .macro CACHE_DOWNALGN _B0
- rlwinm \_B0, \_B0, 0, 0, 31-LD_PPC_CACHE_ALIGNMENT
- .endm
-
- .text
-
- .global _CPU_save_altivec_volatile
-_CPU_save_altivec_volatile:
- /* Align address up to next cache-line boundary */
- addi r3, r3, PPC_CACHE_ALIGNMENT - 1
- CACHE_DOWNALGN r3
-
-#ifndef IGNORE_VRSAVE
- /* Save CRC -- it is used implicitly by all the LOAD/STORE macros
- * when testing if we really should do the load/store operation.
- */
- mfcr r9
-#endif
-
- PREP_FOR_SAVE r0, r3, r4, r8, r6, r10
- /* r0 now contains VRSAVE, r3 still the aligned memory area
- * and r4, r8, r6 are offset by 16, 32, and 48 bytes from r3,
- * respectively. r10 holds zero
- */
- S_V0TOV19 _B0=r3, _B1=r4, _B2=r8, _B3=r6, _O1=r10, _O2=r11
- mfvscr v0
- /* Store vrsave (still in r0) and vscr (in v0) to memory area */
- S_VSCR_VRSAVE r0, v0, r3, r11
-
-#ifndef IGNORE_VRSAVE
- /* Restore CRC */
- mtcr r9
-#endif
- blr
-
- .global _CPU_load_altivec_volatile
-_CPU_load_altivec_volatile:
- /* Align address up to next cache-line boundary */
- addi r3, r3, PPC_CACHE_ALIGNMENT - 1
- CACHE_DOWNALGN r3
-#ifndef IGNORE_VRSAVE
- /* Save CRC -- it is used implicitly by all the LOAD/STORE macros
- * when testing if we really should do the load/store operation.
- */
- mfcr r9
-#endif
-
- /* Try to preload 1st line (where vscr and vrsave are stored) */
- dcbt 0, r3
- /* Point to start of general vector-register area */
- addi r3, r3, PPC_CACHE_ALIGNMENT
- /* Start preloading 2nd line (where first two vectors are) */
- dcbt 0, r3
- L_VSCR_VRSAVE r3, r0, v0
- CMP_BASES r3, r4, r8, r6, r10
- /* Start preloading 3rd line (where vectors 3 and 4 are) */
- dcbt 0, r8
- L_V0TOV19 r3, r4, r8, r6, r10, r11
-
-#ifndef IGNORE_VRSAVE
- mtcr r9
-#endif
- blr
-
- .global _CPU_Context_switch_altivec
-_CPU_Context_switch_altivec:
-
- /* fetch offset of altivec area in context */
- CMPOFF r8
- /* down-align 'to' area to cache-line boundary */
- add r4, r4, r8
- CACHE_DOWNALGN r4
-
- /* Check for PSIM */
- lis r6, _CPU_altivec_psim_cpu@ha
- lwz r6, _CPU_altivec_psim_cpu@l(r6)
- cmpli 0, r6, 0
- bne 1f
- /* Skip data-stream instructions on PSIM (not implemented) */
- dssall
- /* Pre-load new context into cache */
- lis r6, (BSIZE<<(24-16)) | (BCNT<<(16-16))
- ori r6, r6, BSTRIDE
- dstt r4, r6, ds0
-1:
-
-#ifndef IGNORE_VRSAVE
- /* Save CRC -- it is used implicitly by all the LOAD/STORE macros
- * when testing if we really should do the load/store operation.
- */
- mfcr r9
-#endif
-
- /* Is 'from' context == NULL ? (then we just do a 'restore') */
- cmpli 0, r3, 0
- beq 1f /* yes: skip saving 'from' context */
-
- /* SAVE NON-VOLATILE REGISTERS */
-
- /* Compute aligned destination pointer (r8 still holds offset
- * to 'altivec' area in context)
- */
- add r3, r3, r8
- CACHE_DOWNALGN r3
-
- PREP_FOR_SAVE r0, r3, r8, r6, r7, r10
- /* The manual says reading vscr can take some time - do
- * read it here (into a volatile vector register) while
- * we wait for cache blocks to be allocated
- */
- mfvscr v0
- S_V20TOV31 _LRU=l, _B0=r3, _B1=r8, _B2=r6, _B3=r7, _O1=r10, _O2=r11
- /* vrsave is now in r0 (PREP_FOR_SAVE), vscr in v0 */
- S_VSCR_VRSAVE r0, v0, r3, r8
-
-1:
-
- /* LOAD NON-VOLATILE REGISTERS */
-
- /* Advance past vrsave/vscr area */
- addi r4, r4, PPC_CACHE_ALIGNMENT
- L_VSCR_VRSAVE r4, r0, v0
- CMP_BASES r4, r8, r6, r7, r10
- L_V20TOV31 r4, r8, r6, r7, r10, r11
-
-#ifndef IGNORE_VRSAVE
- mtcr r9
-#endif
- blr
-
- .global _CPU_Context_initialize_altivec
-_CPU_Context_initialize_altivec:
- CMPOFF r8
- add r3, r3, r8
- CACHE_DOWNALGN r3
- lis r8, _CPU_altivec_vrsave_initval@ha
- lwz r8, _CPU_altivec_vrsave_initval@l(r8)
- stw r8, VRSAVE_OFF(r3)
- lis r6, _CPU_altivec_vscr_initval@ha
- lwz r6, _CPU_altivec_vscr_initval@l(r6)
- stw r6, VSCR_OFF(r3)
- blr
-
- /*
- * Change the initial value of VRSAVE.
- * Can be used by initialization code if
- * it is determined that code was compiled
- * with -mvrsave=no. In this case, VRSAVE
- * must be set to all-ones which causes this
- * support code to save/restore *all* registers
- * (only has an effect if IGNORE_VRSAVE is
- * not defined -- otherwise all registers are
- * saved/restored anyways).
- */
- .global _CPU_altivec_set_vrsave_initval
-_CPU_altivec_set_vrsave_initval:
- lis r8, _CPU_altivec_vrsave_initval@ha
- stw r3, _CPU_altivec_vrsave_initval@l(r8)
- mtvrsave r3
- blr
-
-#ifdef ALTIVEC_TESTING
- .global msr_VE_on
-msr_VE_on:
- mfmsr r3
- oris r3, r3, 1<<(31-6-16)
- mtmsr r3
- blr
-
- .global msr_VE_off
-msr_VE_off:
- mfmsr r3
- lis r4, 1<<(31-6-16)
- andc r3, r3, r4
- mtmsr r3
- blr
-
-
- .global mfvrsave
-mfvrsave:
- mfvrsave r3
- blr
-
- .global mtvrsave
-mtvrsave:
- mtvrsave r3
- blr
-
- /* Load all vector registers from memory area.
- * NOTE: This routine is not strictly ABI compliant --
- * it guarantees that volatile vector registers
- * have certain values on exit!
- */
- .global _CPU_altivec_load_all
-_CPU_altivec_load_all:
- /* Align address up to next cache-line boundary */
- addi r3, r3, PPC_CACHE_ALIGNMENT - 1
- CACHE_DOWNALGN r3
-#ifndef IGNORE_VRSAVE
- /* Save CRC -- it is used implicitly by all the LOAD/STORE macros
- * when testing if we really should do the load/store operation.
- */
- mfcr r9
-#endif
-
- /* Try to preload 1st line (where vscr and vrsave are stored) */
- dcbt 0, r3
- /* Point to start of general vector-register area */
- addi r3, r3, PPC_CACHE_ALIGNMENT
- /* Start preloading 2nd line (where first two vectors are) */
- dcbt 0, r3
- L_VSCR_VRSAVE r3, r0, v0
- CMP_BASES r3, r4, r8, r6, r10
- /* Start preloading 3rd line (where vectors 3 and 4 are) */
- dcbt 0, r8
- L_V0TOV31 r3, r4, r8, r6, r10, r11
-
-#ifndef IGNORE_VRSAVE
- mtcr r9
-#endif
- blr
-
- .global _CPU_altivec_save_all
-_CPU_altivec_save_all:
- /* Align address up to next cache-line boundary */
- addi r3, r3, PPC_CACHE_ALIGNMENT - 1
- CACHE_DOWNALGN r3
-
-#ifndef IGNORE_VRSAVE
- /* Save CRC -- it is used implicitly by all the LOAD/STORE macros
- * when testing if we really should do the load/store operation.
- */
- mfcr r9
-#endif
-
- PREP_FOR_SAVE r0, r3, r4, r8, r6, r10
- /* r0 now contains VRSAVE, r3 still the aligned memory area
- * and r4, r8, r6 are offset by 16, 32, and 48 bytes from r3,
- * respectively. r10 holds zero
- */
- S_V0TOV31 _B0=r3, _B1=r4, _B2=r8, _B3=r6, _O1=r10, _O2=r11
- mfvscr v0
- /* Store vrsave (still in r0) and vscr (in v0) to memory area */
- S_VSCR_VRSAVE r0, v0, r3, r11
-
-#ifndef IGNORE_VRSAVE
- /* Restore CRC */
- mtcr r9
-#endif
- blr
-
-
-#if 0
- .gnu_attribute 4,1
- .gnu_attribute 8,1
-#endif
-
-#endif
-#endif