summaryrefslogtreecommitdiffstats
path: root/cpukit/score/cpu/i386/sse_test.c
diff options
context:
space:
mode:
Diffstat (limited to 'cpukit/score/cpu/i386/sse_test.c')
-rw-r--r--cpukit/score/cpu/i386/sse_test.c954
1 files changed, 954 insertions, 0 deletions
diff --git a/cpukit/score/cpu/i386/sse_test.c b/cpukit/score/cpu/i386/sse_test.c
new file mode 100644
index 0000000000..8b1cbd2658
--- /dev/null
+++ b/cpukit/score/cpu/i386/sse_test.c
@@ -0,0 +1,954 @@
+/* $Id$ */
+
+/*
+ * Authorship
+ * ----------
+ * This software was created by
+ * Till Straumann <strauman@slac.stanford.edu>, 2009,
+ * Stanford Linear Accelerator Center, Stanford University.
+ *
+ * Acknowledgement of sponsorship
+ * ------------------------------
+ * This software was produced by
+ * the Stanford Linear Accelerator Center, Stanford University,
+ * under Contract DE-AC03-76SFO0515 with the Department of Energy.
+ *
+ * Government disclaimer of liability
+ * ----------------------------------
+ * Neither the United States nor the United States Department of Energy,
+ * nor any of their employees, makes any warranty, express or implied, or
+ * assumes any legal liability or responsibility for the accuracy,
+ * completeness, or usefulness of any data, apparatus, product, or process
+ * disclosed, or represents that its use would not infringe privately owned
+ * rights.
+ *
+ * Stanford disclaimer of liability
+ * --------------------------------
+ * Stanford University makes no representations or warranties, express or
+ * implied, nor assumes any liability for the use of this software.
+ *
+ * Stanford disclaimer of copyright
+ * --------------------------------
+ * Stanford University, owner of the copyright, hereby disclaims its
+ * copyright and all other rights in this software. Hence, anyone may
+ * freely use it for any purpose without restriction.
+ *
+ * Maintenance of notices
+ * ----------------------
+ * In the interest of clarity regarding the origin and status of this
+ * SLAC software, this and all the preceding Stanford University notices
+ * are to remain affixed to any copy or derivative of this software made
+ * or distributed by the recipient and are to be affixed to any copy of
+ * software made or distributed by the recipient that contains a copy or
+ * derivative of this software.
+ *
+ * ------------------ SLAC Software Notices, Set 4 OTT.002a, 2004 FEB 03
+ */
+
+
+/* Code for testing FPU/SSE context save/restore across exceptions
+ * (including interrupts).
+ *
+ * There are two tasks and an IRQ/EXC handler involved. One task (LP)
+ * is of lower priority than the other (HP) task.
+ *
+ * 1) LP task sets up a context area in memory (known contents; every
+ * register is loaded with different values)
+ *
+ * 2) LP task
+ * 2a saves original FP/SSE context
+ * 2b loads context from 1) into FPU/SSE
+ * 2c raises an exception or interrupt
+ *
+ * * (2d save FPU/SSE context after irq/exception returns to
+ * separate area for verification
+ * 2e reload original FP/SSE context.)
+ *
+ * * All these five steps are coded in assembly to prevent
+ * gcc from manipulating the FP/SSE state. The last two steps,
+ * however, are effectively executed during 6 when control is
+ * returned to the LP task.
+ *
+ * 3) IRQ/EXC handler OS wrapper saves context, initializes FPU and
+ * MXCSR.
+ *
+ * 4) user (our) irq/exc handler clears exception condition, clobbers
+ * FPU and XMM regs and finally releases a semaphore on which HP
+ * task is waiting.
+ *
+ * 5) context switch to HP task. HP task clobbers FPU and XMM regs.
+ * Then it tries to re-acquire the synchronization semaphore and
+ * blocks.
+ *
+ * 6) task switch back to (interrupted) LP task. Original context is
+ * restored and verified against the context that was setup in 1).
+ *
+ *
+ * Three methods for interrupting the LP task are tested
+ *
+ * a) FP exception (by setting an exception status in the context from 1)
+ * b) SSE exception (by computing the square root of a vector of negative
+ * numbers.
+ * c) IRQ (software IRQ via 'INT xx' instruction)
+ *
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#ifdef __rtems__
+#include <rtems.h>
+#include <rtems/score/cpu.h>
+#include <rtems/irq.h>
+#include <rtems/error.h>
+#endif
+
+#include <inttypes.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+
+/* This is currently hardcoded (int xx opcode requires immediate operand) */
+#define SSE_TEST_IRQ 10
+
+typedef uint8_t __v8 __attribute__((vector_size(16)));
+typedef uint32_t __v32 __attribute__((vector_size(16)));
+typedef float __vf __attribute__((vector_size(16)));
+
+#ifndef __rtems__
+/* Clone of what is defined in rtems/score/cpu.h (for testing under linux) */
+typedef struct Context_Control_sse {
+ uint16_t fcw;
+ uint16_t fsw;
+ uint8_t ftw;
+ uint8_t res_1;
+ uint16_t fop;
+ uint32_t fpu_ip;
+ uint16_t cs;
+ uint16_t res_2;
+ uint32_t fpu_dp;
+ uint16_t ds;
+ uint16_t res_3;
+ uint32_t mxcsr;
+ uint32_t mxcsr_mask;
+ struct {
+ uint8_t fpreg[10];
+ uint8_t res_4[ 6];
+ } fp_mmregs[8];
+ uint8_t xmmregs[8][16];
+ uint8_t res_5[224];
+} Context_Control_sse
+__attribute__((aligned(16)))
+;
+#endif
+
+#define MXCSR_FZ (1<<15) /* Flush to zero */
+#define MXCSR_RC(x) (((x)&3)<<13) /* Rounding ctrl */
+#define MXCSR_PM (1<<12) /* Precision msk */
+#define MXCSR_UM (1<<11) /* Underflow msk */
+#define MXCSR_OM (1<<10) /* Overflow msk */
+#define MXCSR_ZM (1<< 9) /* Divbyzero msk */
+#define MXCSR_DM (1<< 8) /* Denormal msk */
+#define MXCSR_IM (1<< 7) /* Invalidop msk */
+#define MXCSR_DAZ (1<< 6) /* Denorml are 0 */
+#define MXCSR_PE (1<< 5) /* Precision flg */
+#define MXCSR_UE (1<< 4) /* Underflow flg */
+#define MXCSR_OE (1<< 3) /* Overflow flg */
+#define MXCSR_ZE (1<< 2) /* Divbyzero flg */
+#define MXCSR_DE (1<< 1) /* Denormal flg */
+#define MXCSR_IE (1<< 0) /* Invalidop flg */
+
+#define MXCSR_ALLM (MXCSR_PM | MXCSR_UM | MXCSR_OM | MXCSR_ZM | MXCSR_DM | MXCSR_IM)
+#define MXCSR_ALLE (MXCSR_PE | MXCSR_UE | MXCSR_OE | MXCSR_ZE | MXCSR_DE | MXCSR_IE)
+
+#define FPSR_B (1<<15) /* FPU busy */
+#define FPSR_C3 (1<<14) /* Cond code C3 */
+#define FPSR_TOP(x) (((x)&7)<<11) /* TOP */
+#define FPSR_C2 (1<<10) /* Cond code C2 */
+#define FPSR_C1 (1<< 9) /* Cond code C1 */
+#define FPSR_C0 (1<< 8) /* Cond code C0 */
+#define FPSR_ES (1<< 7) /* Error summary */
+#define FPSR_SF (1<< 6) /* Stack fault */
+#define FPSR_PE (1<< 5) /* Precision flg */
+#define FPSR_UE (1<< 4) /* Underflow flg */
+#define FPSR_OE (1<< 3) /* Overflow flg */
+#define FPSR_ZE (1<< 2) /* Divbyzero flg */
+#define FPSR_DE (1<< 1) /* Denormal flg */
+#define FPSR_IE (1<< 0) /* Invalidop flg */
+
+#define FPCW_X (1<<12) /* Infinity ctrl */
+#define FPCW_RC(x) (((x)&3)<<10) /* Rounding ctrl */
+#define FPCW_PC(x) (((x)&3)<< 8) /* Precision ctl */
+#define FPCW_PM (1<< 5) /* Precision msk */
+#define FPCW_UM (1<< 4) /* Underflow msk */
+#define FPCW_OM (1<< 3) /* Overflow msk */
+#define FPCW_ZM (1<< 2) /* Divbyzero msk */
+#define FPCW_DM (1<< 1) /* Denormal msk */
+#define FPCW_IM (1<< 0) /* Invalidop msk */
+
+#define FPCW_ALLM (FPCW_PM | FPCW_UM | FPCW_OM | FPCW_ZM | FPCW_DM | FPCW_IM)
+#define FPSR_ALLE (FPSR_ES | FPSR_SF | FPSR_PE | FPSR_UE | FPSR_OE | FPSR_ZE | FPSR_DE | FPSR_IE)
+
+/* Store 'double' into 80-bit register image */
+void
+fp_st1(uint8_t (*p_dst)[10], double v)
+{
+ asm volatile("fstpt %0":"=m"(*p_dst):"t"(v):"st");
+}
+
+/* Store 'double' into 80-bit register image #i in context */
+void
+fp_st(Context_Control_sse *p_ctxt, int i, double v)
+{
+ fp_st1(&p_ctxt->fp_mmregs[i].fpreg,v);
+}
+
+/* Load 'double' from 80-bit register image */
+double
+fp_ld1(uint8_t (*p_src)[10])
+{
+double v;
+
+ asm volatile("fldt %1":"=t"(v):"m"((*p_src)[0]),"m"(*p_src));
+ return v;
+}
+
+/* Load 'double' from 80-bit register image #i in context */
+double
+fp_ld(Context_Control_sse *p_ctxt, int i)
+{
+ return fp_ld1(&p_ctxt->fp_mmregs[i].fpreg);
+}
+
+#define FPUCLOBBER \
+ "st","st(1)","st(2)","st(3)", \
+ "st(4)","st(5)","st(6)","st(7)",\
+ "fpsr","fpcr"
+
+/* There seems to be no way to say that mxcsr was clobbered */
+
+#define SSECLOBBER \
+ "xmm0","xmm1","xmm2","xmm3", \
+ "xmm4","xmm5","xmm6","xmm7"
+
+static void
+sse_clobber(uint32_t x)
+{
+__v32 v = { x, x, x, x };
+ asm volatile (
+ " movdqa %0, %%xmm0 \n"
+ " movdqa %%xmm0, %%xmm1 \n"
+ " movdqa %%xmm0, %%xmm2 \n"
+ " movdqa %%xmm0, %%xmm3 \n"
+ " movdqa %%xmm0, %%xmm4 \n"
+ " movdqa %%xmm0, %%xmm5 \n"
+ " movdqa %%xmm0, %%xmm6 \n"
+ " movdqa %%xmm0, %%xmm7 \n"
+ :
+ :"m"(v)
+ :SSECLOBBER
+ );
+}
+
+void
+all_clobber(uint32_t v1, uint32_t v2);
+
+__asm__ (
+"all_clobber: \n"
+" finit \n"
+" movq 0(%esp), %xmm0 \n"
+" punpcklqdq %xmm0, %xmm0 \n"
+" movdqa %xmm0, %xmm1 \n"
+" movdqa %xmm0, %xmm2 \n"
+" movdqa %xmm0, %xmm3 \n"
+" movdqa %xmm0, %xmm4 \n"
+" movdqa %xmm0, %xmm5 \n"
+" movdqa %xmm0, %xmm6 \n"
+" movdqa %xmm0, %xmm7 \n"
+" ret \n"
+);
+
+/* Clear FPU and save FPU/SSE registers to context area */
+
+void
+init_ctxt(Context_Control_sse *p_ctxt);
+
+__asm__ (
+"init_ctxt: \n"
+" finit \n"
+" mov 4(%esp), %eax\n"
+" fxsave (%eax) \n"
+" fwait \n"
+" ret \n"
+);
+
+/* Save FPU/SSE registers to context area */
+
+static void
+stor_ctxt(Context_Control_sse *p_ctxt)
+{
+ memset(p_ctxt, 0, sizeof(*p_ctxt));
+ asm volatile(
+/* " finit \n" */
+ " fxsave %0 \n"
+ " fwait \n"
+ : "=m"(*p_ctxt)
+ :
+ : FPUCLOBBER
+ );
+}
+
+#define H08 "0x%02"PRIx8
+#define H16 "0x%04"PRIx16
+#define H32 "0x%08"PRIx32
+
+#define F16 "mismatch ("H16" != "H16")\n"
+
+#define FLDCMP(fld, fmt) \
+ if ( a->fld != b->fld ) { \
+ rval = 1; \
+ if ( !quiet ) \
+ fprintf(stderr,#fld" mismatch ("fmt" != "fmt")\n",a->fld, b->fld); \
+ }
+
+#define FLTCMP(i) \
+ do { \
+ if ( ( (a->ftw ^ b->ftw) & (1<<i)) \
+ || ( (a->ftw & b->ftw & (1<<i)) && \
+ memcmp(a->fp_mmregs[i].fpreg, \
+ b->fp_mmregs[i].fpreg, \
+ sizeof(a->fp_mmregs[i].fpreg)) \
+ ) \
+ ) { \
+ rval = 1; \
+ if ( !quiet ) { \
+ double fa = fp_ld(a, i); \
+ double fb = fp_ld(b, i); \
+ if ( ((a->ftw ^ b->ftw) & (1<<i)) ) \
+ fprintf(stderr,"fpreg[%u] TAG mismatch (%u != %u)\n",i,(a->ftw & (1<<i)) ? 1 : 0,(b->ftw & (1<<i)) ? 1 : 0); \
+ else \
+ fprintf(stderr,"fpreg[%u] mismatch (%g != %g)\n",i,fa,fb); \
+ } \
+ } \
+ } while (0)
+
+#define XMMCMP(i) \
+ do { \
+ if ( memcmp(&a->xmmregs[i], \
+ &b->xmmregs[i], \
+ sizeof(a->xmmregs[i])) \
+ ) { \
+ rval = 1; \
+ if ( !quiet ) { \
+ int _jj; \
+ fprintf(stderr,"xmmreg[%u] mismatch:\n", i); \
+ fprintf(stderr," "); \
+ for (_jj=0; _jj<16; _jj++) \
+ fprintf(stderr,"%02x ",a->xmmregs[i][_jj]); \
+ fprintf(stderr,"\n !=\n"); \
+ fprintf(stderr," "); \
+ for (_jj=0; _jj<16; _jj++) \
+ fprintf(stderr,"%02x ",b->xmmregs[i][_jj]); \
+ fprintf(stderr,"\n"); \
+ } \
+ } \
+ } while (0)
+
+
+/* Compare two FPU/SSE context areas and flag differences;
+ * RETURNS: zero if the contexts match and nonzero otherwise
+ */
+static int
+cmp_ctxt(Context_Control_sse *a, Context_Control_sse *b, int quiet)
+{
+int rval = 0;
+int i;
+ FLDCMP(fcw,H16);
+ FLDCMP(fsw,H16);
+ FLDCMP(ftw,H08);
+ FLDCMP(fop,H16);
+ FLDCMP(fpu_ip,H32);
+ FLDCMP(cs,H16);
+ FLDCMP(fpu_dp,H32);
+ FLDCMP(ds,H16);
+ FLDCMP(mxcsr,H32);
+ FLDCMP(mxcsr_mask,H32);
+ for ( i=0; i<8; i++ ) {
+ FLTCMP(i);
+ }
+ for ( i=0; i<8; i++ ) {
+ XMMCMP(i);
+ }
+ return rval;
+}
+
+/* Possible arguments to exc_raise() */
+
+#define FP_EXC 0
+#define IRQ_EXC 1
+#define SSE_EXC -1
+
+/* Check stack alignment by raising the interrupt from a
+ * non-16-byte aligned section of code. The exception/IRQ
+ * handler must align the stack and SSE context area
+ * properly or it will crash.
+ */
+#define __INTRAISE(x) " int $32+"#x" \n"
+#define INTRAISE(x) __INTRAISE(x)
+
+__asm__ (
+"do_raise: \n"
+" fwait \n"
+" test %eax, %eax \n"
+" je 2f \n"
+" jl 1f \n"
+INTRAISE(SSE_TEST_IRQ)
+" jmp 2f \n"
+"1: sqrtps %xmm0, %xmm0 \n"
+"2: \n"
+" ret \n"
+);
+
+#define SSE_TEST_HP_FAILED 1
+#define SSE_TEST_FSPR_FAILED 2
+#define SSE_TEST_CTXTCMP_FAILED 4
+
+static const char *fail_msgs[] = {
+ "Seems that HP task was not executing",
+ "FPSR 'Invalid-operation' flag should be clear",
+ "Restored context does NOT match the saved one",
+};
+
+static void prstat(int st, const char *where)
+{
+int i,msk;
+ for ( i=0, msk=1; i<sizeof(fail_msgs)/sizeof(fail_msgs[0]); i++, msk<<=1 ) {
+ if ( (st & msk) ) {
+ fprintf(stderr,"sse_test ERROR: %s (testing: %s)\n", fail_msgs[i], where);
+ }
+ }
+}
+
+int sse_test_debug = 0;
+
+static int
+exc_raise(int kind)
+{
+Context_Control_sse nctxt;
+Context_Control_sse octxt;
+Context_Control_sse orig_ctxt;
+int i,j,rval;
+double s2;
+uint16_t fsw;
+__vf f4 = { -1., -2., -3., -4. };
+__vf tmp;
+__v32 sgn = { (1<<31), (1<<31), (1<<31), (1<<31) };
+
+ stor_ctxt(&octxt);
+
+ octxt.fsw &= ~FPSR_ALLE;
+ octxt.mxcsr &= ~MXCSR_ALLE;
+
+ for ( i=0; i<8; i++ ) {
+ fp_st(&octxt, i, (double)i+0.1);
+ for (j=0; j<16; j++) {
+ octxt.xmmregs[i][j]=(i<<4)+j;
+ }
+ }
+
+
+ if ( SSE_EXC == kind ) {
+ memcpy(octxt.xmmregs[0], &f4, sizeof(f4));
+ octxt.mxcsr &= ~MXCSR_IM;
+ }
+
+ /* set tags to 'valid' */
+ octxt.ftw = 0xff;
+
+ /* enable 'invalid arg' exception */
+ octxt.fcw &= ~ ( FPCW_IM );
+
+ if ( FP_EXC == kind ) {
+ octxt.fsw |= ( FPSR_IE | FPSR_ES );
+ }
+
+ if ( sse_test_debug )
+ printk("RAISE (fsw was 0x%04x)\n", orig_ctxt.fsw);
+ asm volatile(
+ " fxsave %2 \n"
+#ifdef __rtems__
+ " movl %4, sse_test_check\n"
+#endif
+ " fxrstor %3 \n"
+ " call do_raise \n"
+#ifdef __rtems__
+ " movl sse_test_check, %1\n"
+#else
+ " movl $0, %1 \n"
+#endif
+#ifdef TEST_MISMATCH
+ " pxor %%xmm0, %%xmm0 \n"
+#endif
+ " fxsave %0 \n"
+ " fxrstor %2 \n"
+ : "=m"(nctxt),"=&r"(rval),"=m"(orig_ctxt)
+ : "m"(octxt), "i"(SSE_TEST_HP_FAILED),"a"(kind)
+ : "xmm0"
+ );
+
+ if ( ( FPSR_IE & nctxt.fsw ) ) {
+ rval |= SSE_TEST_FSPR_FAILED;
+ }
+ if ( FP_EXC == kind )
+ nctxt.fsw |= (FPSR_IE | FPSR_ES);
+ else if ( SSE_EXC == kind ) {
+ tmp = __builtin_ia32_sqrtps( (__vf)(~sgn & (__v32)f4) );
+ /* sqrt raises PE; just clear it */
+ nctxt.mxcsr &= ~MXCSR_PE;
+ memcpy( octxt.xmmregs[0], &tmp, sizeof(tmp) );
+ }
+
+ if ( cmp_ctxt(&nctxt, &octxt, 0) ) {
+ rval |= SSE_TEST_CTXTCMP_FAILED;
+ }
+
+ s2 = sqrt(2.0);
+
+ asm volatile("fstsw %0":"=m"(fsw));
+
+ if ( sse_test_debug )
+ printf("sqrt(2): %f (FSTW: 0x%02"PRIx16")\n", sqrt(2.0), fsw);
+
+ return rval;
+}
+
+#ifdef __rtems__
+static void
+sse_test_ehdl(CPU_Exception_frame *p_f);
+
+rtems_id sse_test_sync = 0;
+cpuExcHandlerType sse_test_ohdl = 0;
+
+CPU_Exception_frame *sse_test_frame = 0;
+volatile int sse_test_check = SSE_TEST_HP_FAILED;
+unsigned sse_tests = 0;
+
+rtems_task
+sse_test_hp_task(rtems_task_argument arg)
+{
+rtems_id sync = (rtems_id)arg;
+
+uint16_t fp_cw;
+uint32_t mxcsr;
+rtems_status_code sc;
+const char * msgs[] = {"FPU_EXC", "SSE_EXC", "IRQ_EXC"};
+int i;
+
+ /* verify that FPU control word is default value */
+ asm volatile("fstcw %0":"=m"(fp_cw));
+ if ( fp_cw != _CPU_Null_fp_context.fpucw ) {
+ fprintf(
+ stderr,
+ "ERROR: FPU CW initialization mismatch: got 0x%04"PRIx16"; expected 0x%04"PRIx16"\n",
+ fp_cw,
+ _CPU_Null_fp_context.fpucw
+ );
+ }
+
+ /* check MXCSR default value */
+ asm volatile("stmxcsr %0":"=m"(mxcsr));
+ if ( mxcsr != _CPU_Null_fp_context.mxcsr ) {
+ fprintf(
+ stderr,
+ "ERROR: MXCSR initialization mismatch: got 0x%08"PRIx32"; expected 0x%08"PRIx32"\n",
+ mxcsr,
+ _CPU_Null_fp_context.mxcsr
+ );
+ }
+
+
+ for (i=0; i<sizeof(msgs)/sizeof(msgs[0]); i++ ) {
+ if ( ( sse_tests & (1<<i) ) ) {
+ if ( sse_test_debug )
+ printk("HP task will now block for %s\n",msgs[i]);
+
+ /* Blocking here lets the low-priority task continue */
+ sc = rtems_semaphore_obtain(sync, RTEMS_WAIT, 500);
+
+ all_clobber(0xaffeaffe, 0xcafecafe);
+
+ if ( RTEMS_SUCCESSFUL != sc ) {
+ rtems_error(sc,"ERROR: sse_test hp task wasn't notified of exception\n");
+ goto bail;
+ }
+
+ /* set flag indicating that we executed until here */
+ sse_test_check = 0;
+ }
+ }
+
+bail:
+ rtems_task_suspend(RTEMS_SELF);
+}
+
+/* Flags to skip individual tests */
+#define SSE_TEST_FPU_EXC (1<<0)
+#define SSE_TEST_SSE_EXC (1<<1)
+#define SSE_TEST_IRQ_EXC (1<<2)
+
+#define SSE_TEST_ALL 7
+
+/* If this flag is given the executing task is not deleted
+ * when the test finishes. This is useful if you want to
+ * execute from a shell or similar.
+ */
+#define SSE_TEST_NO_DEL (1<<0)
+
+/* Task arg is bitmask of these flags */
+rtems_task
+sse_test_lp_task(rtems_task_argument arg)
+{
+rtems_id hp_task = 0;
+rtems_status_code sc;
+rtems_task_priority pri;
+uint16_t fp_cw,fp_cw_set;
+uint32_t mxcsr, mxcsr_set;
+rtems_irq_connect_data irqd;
+int flags = (int)arg;
+int st;
+int errs = 0;
+
+ sse_tests = SSE_TEST_ALL & ~(flags>>1);
+
+ sse_test_ohdl = 0;
+
+ fp_cw_set = _CPU_Null_fp_context.fpucw | FPCW_RC(3) ;
+ mxcsr_set = _CPU_Null_fp_context.mxcsr | MXCSR_RC(3) ;
+ asm volatile("ldmxcsr %0"::"m"(mxcsr_set));
+ asm volatile("fldcw %0"::"m"(fp_cw_set));
+
+ sc = rtems_semaphore_create(
+ rtems_build_name('s','s','e','S'),
+ 0,
+ RTEMS_SIMPLE_BINARY_SEMAPHORE,
+ 0,
+ &sse_test_sync
+ );
+ if ( RTEMS_SUCCESSFUL != sc ) {
+ rtems_error(sc, "sse_test ERROR: creation of 'sync' semaphore failed");
+ errs++;
+ goto bail;
+ }
+
+ rtems_task_set_priority( RTEMS_SELF, RTEMS_CURRENT_PRIORITY, &pri );
+
+ sc = rtems_task_create(
+ rtems_build_name('s','s','e','H'),
+ pri - 2,
+ 20000,
+ RTEMS_DEFAULT_MODES,
+ RTEMS_FLOATING_POINT,
+ &hp_task
+ );
+ if ( RTEMS_SUCCESSFUL != sc ) {
+ hp_task = 0;
+ rtems_error( sc, "sse_test ERROR: creation of high-priority task failed");
+ errs++;
+ goto bail;
+ }
+
+ sc = rtems_task_start( hp_task, sse_test_hp_task, (rtems_task_argument)sse_test_sync );
+ if ( RTEMS_SUCCESSFUL != sc ) {
+ rtems_error( sc, "sse_test ERROR: start of high-priority task failed");
+ goto bail;
+ }
+
+ /* Test if FP/SSE context is saved/restored across an exception */
+ sse_test_ohdl = _currentExcHandler;
+ _currentExcHandler = sse_test_ehdl;
+
+ if ( (sse_tests & SSE_TEST_FPU_EXC) ) {
+ if ( (st = exc_raise(FP_EXC)) ) {
+ prstat(st,"FP_EXC");
+ errs++;
+ }
+
+ /* Test modified FPCW/MXCSR */
+ asm volatile("fstcw %0":"=m"(fp_cw));
+ asm volatile("stmxcsr %0":"=m"(mxcsr));
+ mxcsr &= ~(MXCSR_ALLE);
+ if ( fp_cw != fp_cw_set ) {
+ fprintf(stderr,"sse_test ERROR: FPCW mismatch (after FP_EXC): expected 0x%04"PRIx16", got 0x%04"PRIx16"\n", fp_cw_set, fp_cw);
+ errs++;
+ }
+ if ( mxcsr != mxcsr_set ) {
+ fprintf(stderr,"sse_test ERROR: MXCSR mismatch (after FP_EXC): expected 0x%08"PRIx32", got 0x%08"PRIx32"\n", mxcsr_set, mxcsr);
+ errs++;
+ }
+ }
+
+ if ( (sse_tests & SSE_TEST_SSE_EXC) ) {
+ if ( (st = exc_raise(SSE_EXC)) ) {
+ prstat(st, "SSE_EXC");
+ errs++;
+ }
+
+ /* Test modified FPCW/MXCSR */
+ asm volatile("fstcw %0":"=m"(fp_cw));
+ asm volatile("stmxcsr %0":"=m"(mxcsr));
+ mxcsr &= ~(MXCSR_ALLE);
+ if ( fp_cw != fp_cw_set ) {
+ fprintf(stderr,"sse_test ERROR: FPCW mismatch (after SSE_EXC): expected 0x%04"PRIx16", got 0x%04"PRIx16"\n", fp_cw_set, fp_cw);
+ errs++;
+ }
+ if ( mxcsr != mxcsr_set ) {
+ fprintf(stderr,"sse_test ERROR: MXCSR mismatch (after SSE_EXC): expected 0x%08"PRIx32", got 0x%08"PRIx32"\n", mxcsr_set, mxcsr);
+ errs++;
+ }
+ }
+
+
+ if ( (sse_tests & SSE_TEST_IRQ_EXC) ) {
+ memset( &irqd, 0, sizeof(irqd) );
+ irqd.name = SSE_TEST_IRQ;
+ irqd.hdl = (void*)sse_test_ehdl;
+ irqd.handle = 0;
+
+ if ( ! BSP_install_rtems_irq_handler( &irqd ) ) {
+ fprintf(stderr, "sse_test ERROR: Unable to install ISR\n");
+ errs++;
+ goto bail;
+ }
+
+ /* Test if FP/SSE context is saved/restored across an interrupt */
+ if ( (st = exc_raise(IRQ_EXC)) ) {
+ prstat(st, "IRQ");
+ errs++;
+ }
+
+ if ( ! BSP_remove_rtems_irq_handler( &irqd ) ) {
+ fprintf(stderr, "sse_test ERROR: Unable to uninstall ISR\n");
+ }
+
+ /* Test modified FPCW/MXCSR */
+ asm volatile("fstcw %0":"=m"(fp_cw));
+ asm volatile("stmxcsr %0":"=m"(mxcsr));
+ mxcsr &= ~(MXCSR_ALLE);
+ if ( fp_cw != fp_cw_set ) {
+ fprintf(stderr,"sse_test ERROR: FPCW mismatch (after IRQ): expected 0x%04"PRIx16", got 0x%04"PRIx16"\n", fp_cw_set, fp_cw);
+ errs++;
+ }
+ if ( mxcsr != mxcsr_set ) {
+ fprintf(stderr,"sse_test ERROR: MXCSR mismatch (after IRQ): expected 0x%08"PRIx32", got 0x%08"PRIx32"\n", mxcsr_set, mxcsr);
+ errs++;
+ }
+ }
+
+
+bail:
+ /* Wait for console to calm down... */
+ rtems_task_wake_after(5);
+ fprintf(stderr,"SSE/FPU Test %s (%u errors)\n", errs ? "FAILED":"PASSED", errs);
+ if ( sse_test_ohdl ) {
+ _currentExcHandler = sse_test_ohdl;
+ sse_test_ohdl = 0;
+ }
+ if ( sse_test_sync )
+ rtems_semaphore_delete( sse_test_sync );
+ sse_test_sync = 0;
+ if ( hp_task )
+ rtems_task_delete( hp_task );
+
+ if ( ! (flags & SSE_TEST_NO_DEL) )
+ rtems_task_delete( RTEMS_SELF );
+}
+
+static void
+sse_test_ehdl(CPU_Exception_frame *p_f)
+{
+int i,j,start = 0;
+int mismatch;
+__vf f4;
+
+ if ( p_f ) {
+ printk("Got exception #%u\n", p_f->idtIndex);
+ printk("EIP: 0x%08x, ESP: 0x%08x\n", p_f->eip, p_f->esp0);
+ printk("TID: 0x%08x\n", _Thread_Executing->Object.id);
+
+ if ( ! p_f->fp_ctxt ) {
+ printk("ERROR: NO FP/SSE CONTEXT ATTACHED ??\n");
+ sse_test_ohdl(p_f);
+ }
+ if ( 16 == p_f->idtIndex ) {
+ printk("Resetting FP status (0x%04"PRIx16")\n", p_f->fp_ctxt->fsw);
+ p_f->fp_ctxt->fsw = 0;
+ } else if ( 19 == p_f->idtIndex ) {
+ start = 1;
+ memcpy(&f4, p_f->fp_ctxt->xmmregs[0], sizeof(f4));
+ f4 = -f4;
+ memcpy(p_f->fp_ctxt->xmmregs[0], &f4, sizeof(f4));
+ p_f->fp_ctxt->mxcsr &= ~MXCSR_ALLE;
+ } else {
+ printk("(skipping non-FP exception)\n");
+ sse_test_ohdl(p_f);
+ }
+
+ printk("Checking XMM regs -- ");
+ for ( mismatch=0, i=start; i<8; i++ ) {
+ for ( j=0; j<16; j++ ) {
+ if ( p_f->fp_ctxt->xmmregs[i][j] != ((i<<4) | j) )
+ mismatch++;
+ }
+ }
+ if ( mismatch ) {
+ printk("%u mismatches; dump:\n", mismatch);
+ for ( i=0; i<8; i++ ) {
+ for ( j=0; j<16; j++ ) {
+ printk("0x%02x ", p_f->fp_ctxt->xmmregs[i][j]);
+ }
+ printk("\n");
+ }
+ } else {
+ printk("OK\n");
+ }
+ } else {
+ printk("IRQ %u\n", SSE_TEST_IRQ);
+ }
+ printk("Clobbering FPU/SSE state\n");
+ asm volatile("finit");
+ sse_clobber(0xdeadbeef);
+ printk("Notifying task\n");
+ rtems_semaphore_release( sse_test_sync );
+}
+
+#else
+
+/* Code using signals for testing under linux; unfortunately, 32-bit
+ * linux seems to pass no SSE context info to the sigaction...
+ */
+
+#include <signal.h>
+#include <ucontext.h>
+
+#define MKCASE(X) case FPE_##X: msg="FPE_"#X; break;
+
+#define CLRXMM(i) __asm__ volatile("pxor %%xmm"#i", %%xmm"#i:::"xmm"#i)
+
+static void
+fpe_act(int signum, siginfo_t *p_info, void *arg3)
+{
+ucontext_t *p_ctxt = arg3;
+const char *msg = "FPE_UNKNOWN";
+uint16_t *p_fst;
+
+ if ( SIGFPE != signum ) {
+ fprintf(stderr,"WARNING: fpe_act handles SIGFPE\n");
+ return;
+ }
+ switch ( p_info->si_code ) {
+ default:
+ fprintf(stderr,"WARNING: fpe_act got unkown code %u\n", p_info->si_code);
+ return;
+ MKCASE(INTDIV);
+ MKCASE(INTOVF);
+ MKCASE(FLTDIV);
+ MKCASE(FLTOVF);
+ MKCASE(FLTUND);
+ MKCASE(FLTRES);
+ MKCASE(FLTINV);
+ MKCASE(FLTSUB);
+ }
+ fprintf(stderr,"Got SIGFPE (%s) @%p\n", msg, p_info->si_addr);
+#ifdef __linux__
+ fprintf(stderr,"Resetting FP status 0x%02lx\n", p_ctxt->uc_mcontext.fpregs->sw);
+ p_ctxt->uc_mcontext.fpregs->sw = 0;
+#ifdef TEST_MISMATCH
+ fp_st1((void*)&p_ctxt->uc_mcontext.fpregs->_st[3],2.345);
+#endif
+#endif
+
+ /* Clear FPU; if context is properly saved/restored around exception
+ * then this shouldn't disturb the register contents of the interrupted
+ * task/process.
+ */
+ asm volatile("finit");
+ sse_clobber(0xdeadbeef);
+}
+
+static void
+test(void)
+{
+Context_Control_sse ctxt;
+
+ stor_ctxt(&ctxt);
+ printf("FPCW: 0x%"PRIx16"\nFPSW: 0x%"PRIx16"\n", ctxt.fcw, ctxt.fsw);
+ printf("FTAG: 0x%"PRIx8"\n",ctxt.ftw);
+}
+
+int
+main(int argc, char **argv)
+{
+struct sigaction a1, a2;
+uint32_t mxcsr;
+
+ memset(&a1, 0, sizeof(a1));
+
+ a1.sa_sigaction = fpe_act;
+ a1.sa_flags = SA_SIGINFO;
+
+ if ( sigaction(SIGFPE, &a1, &a2) ) {
+ perror("sigaction");
+ return 1;
+ }
+
+ asm volatile("stmxcsr %0":"=m"(mxcsr));
+ printf("MXCSR: 0x%08"PRIx32"\n", mxcsr);
+
+ test();
+ exc_raise(0);
+ return 0;
+}
+#endif
+
+/* Helpers to access CR4 and MXCSR */
+
+uint32_t
+mfcr4()
+{
+uint32_t rval;
+ asm volatile("mov %%cr4, %0":"=r"(rval));
+ return rval;
+}
+
+void
+mtcr4(uint32_t rval)
+{
+ asm volatile("mov %0, %%cr4"::"r"(rval));
+}
+
+uint32_t
+mfmxcsr()
+{
+uint32_t rval;
+ asm volatile("stmxcsr %0":"=m"(rval));
+ return rval;
+}
+
+void
+mtmxcsr(uint32_t rval)
+{
+ asm volatile("ldmxcsr %0"::"m"(rval));
+}
+
+
+float
+sseraise()
+{
+__vf f4={-2., -2., -2. -2.};
+float f;
+ f4 = __builtin_ia32_sqrtps( f4 );
+ memcpy(&f,&f4,sizeof(f));
+ return f;
+}