From 97005786d89fd7a57b5fe82f713cea739916f3da Mon Sep 17 00:00:00 2001 From: Joel Sherrill Date: Mon, 30 Oct 1995 21:54:45 +0000 Subject: SPARC port passes all tests --- cpukit/score/cpu/sparc/README | 118 ++++++++++++ cpukit/score/cpu/sparc/asm.h | 16 +- cpukit/score/cpu/sparc/cpu.c | 370 ++++++++++++++++++++++++++++--------- cpukit/score/cpu/sparc/rtems/asm.h | 16 +- cpukit/score/cpu/unix/cpu.c | 52 +----- 5 files changed, 435 insertions(+), 137 deletions(-) create mode 100644 cpukit/score/cpu/sparc/README (limited to 'cpukit/score/cpu') diff --git a/cpukit/score/cpu/sparc/README b/cpukit/score/cpu/sparc/README new file mode 100644 index 0000000000..0c481d67c1 --- /dev/null +++ b/cpukit/score/cpu/sparc/README @@ -0,0 +1,118 @@ +# +# $Id$ +# + +This file discusses SPARC specific issues which are important to +this port. The primary topics in this file are: + + + Global Register Usage + + Stack Frame + + EF bit in the PSR + + +Global Register Usage +===================== + +This information on register usage is based heavily on a comment in the +file gcc-2.7.0/config/sparc/sparc.h in the the gcc 2.7.0 source. + + + g0 is hardwired to 0 + + On non-v9 systems: + - g1 is free to use as temporary. + - g2-g4 are reserved for applications. Gcc normally uses them as + temporaries, but this can be disabled via the -mno-app-regs option. + - g5 through g7 are reserved for the operating system. + + On v9 systems: + - g1 and g5 are free to use as temporaries. + - g2-g4 are reserved for applications (the compiler will not normally use + them, but they can be used as temporaries with -mapp-regs). + - g6-g7 are reserved for the operating system. + + NOTE: As of gcc 2.7.0 register g1 was used in the following scenarios: + + + as a temporary by the 64 bit sethi pattern + + when restoring call-preserved registers in large stack frames + +RTEMS places no constraints on the usage of the global registers. Although +gcc assumes that either g5-g7 (non-V9) or g6-g7 (V9) are reserved for the +operating system, RTEMS does not assume any special use for them. + + + +Stack Frame +=========== + +The stack grows downward (i.e. to lower addresses) on the SPARC architecture. + +The following is the organization of the stack frame: + + + + | ............... | + fp | | + +-------------------------------+ + | | + | Local registers, temporaries, | + | and saved floats | x bytes + | | + sp + x +-------------------------------+ + | | + | outgoing parameters past | + | the sixth one | x bytes + | | + sp + 92 +-------------------------------+ * + | | * + | area for callee to save | * + | register arguments | * 24 bytes + | | * + sp + 68 +-------------------------------+ * + | | * + | structure return pointer | * 4 bytes + | | * + sp + 64 +-------------------------------+ * + | | * + | local register set | * 32 bytes + | | * + sp + 32 +-------------------------------+ * + | | * + | input register set | * 32 bytes + | | * + sp +-------------------------------+ * + + +* = minimal stack frame + +x = optional components + +EF bit in the PSR +================= + +The EF (enable floating point unit) in the PSR is utilized in this port to +prevent non-floating point tasks from performing floating point +operations. This bit is maintained as part of the integer context. +However, the floating point context is switched BEFORE the integer +context. Thus the EF bit in place at the time of the FP switch may +indicate that FP operations are disabled. This occurs on certain task +switches, when the EF bit will be 0 for the outgoing task and thus a fault +will be generated on the first FP operation of the FP context save. + +The remedy for this is to enable FP access as the first step in both the +save and restore of the FP context area. This bit will be subsequently +reloaded by the integer context switch. + +Two of the scenarios which demonstrate this problem are outlined below: + +1. When the first FP task is switched to. The system tasks are not FP and +thus would be unable to restore the FP context of the incoming task. + +2. On a deferred FP context switch. In this case, the system might switch +from FP Task A to non-FP Task B and then to FP Task C. In this scenario, +the floating point state must technically be saved by a non-FP task. + + + + + + + + diff --git a/cpukit/score/cpu/sparc/asm.h b/cpukit/score/cpu/sparc/asm.h index 10157171c5..a3d62416b8 100644 --- a/cpukit/score/cpu/sparc/asm.h +++ b/cpukit/score/cpu/sparc/asm.h @@ -28,7 +28,9 @@ */ #define ASM + #include +#include /* * Recent versions of GNU cpp define variables which indicate the @@ -37,7 +39,9 @@ * have to define these as appropriate. */ -/* XXX This does not appear to work on gcc 2.7.0 on the sparc */ +/* XXX __USER_LABEL_PREFIX__ and __REGISTER_PREFIX__ do not work on gcc 2.7.0 */ +/* XXX The following ifdef magic fixes the problem but results in a warning */ +/* XXX when compiling assembly code. */ #undef __USER_LABEL_PREFIX__ #ifndef __USER_LABEL_PREFIX__ #define __USER_LABEL_PREFIX__ _ @@ -91,6 +95,16 @@ #define PUBLIC(sym) .globl SYM (sym) #define EXTERN(sym) .globl SYM (sym) +/* + * Entry for traps which jump to a programmer-specified trap handler. + */ + +#define TRAP(_vector, _handler) \ + mov %psr, %l0 ; \ + sethi %hi(_handler), %l4 ; \ + jmp %l4+%lo(_handler); \ + mov _vector, %l3 + #endif /* end of include file */ diff --git a/cpukit/score/cpu/sparc/cpu.c b/cpukit/score/cpu/sparc/cpu.c index cf70913d5e..23d998cab5 100644 --- a/cpukit/score/cpu/sparc/cpu.c +++ b/cpukit/score/cpu/sparc/cpu.c @@ -7,52 +7,115 @@ #include #include -/* _CPU_Initialize +#if defined(erc32) +#include +#endif + +/* + * This initializes the set of opcodes placed in each trap + * table entry. The routine which installs a handler is responsible + * for filling in the fields for the _handler address and the _vector + * trap type. + * + * The constants following this structure are masks for the fields which + * must be filled in when the handler is installed. + */ + +const CPU_Trap_table_entry _CPU_Trap_slot_template = { + 0xa1480000, /* mov %psr, %l0 */ + 0x29000000, /* sethi %hi(_handler), %l4 */ + 0x81c52000, /* jmp %l4 + %lo(_handler) */ + 0xa6102000 /* mov _vector, %l3 */ +}; + +/*PAGE + * + * _CPU_Initialize * * This routine performs processor dependent initialization. * - * INPUT PARAMETERS: + * Input Parameters: * cpu_table - CPU table to initialize * thread_dispatch - address of disptaching routine + * + * Output Parameters: NONE + * + * NOTE: There is no need to save the pointer to the thread dispatch routine. + * The SPARC's assembly code can reference it directly with no problems. */ - void _CPU_Initialize( rtems_cpu_table *cpu_table, - void (*thread_dispatch) /* ignored on this CPU */ + void (*thread_dispatch) /* ignored on this CPU */ ) { - void *pointer; + void *pointer; + unsigned32 trap_table_start; + unsigned32 tbr_value; + CPU_Trap_table_entry *old_tbr; + CPU_Trap_table_entry *trap_table; /* - * The thread_dispatch argument is the address of the entry point - * for the routine called at the end of an ISR once it has been - * decided a context switch is necessary. On some compilation - * systems it is difficult to call a high-level language routine - * from assembly. This allows us to trick these systems. - * - * If you encounter this problem save the entry point in a CPU - * dependent variable. + * Install the executive's trap table. All entries from the original + * trap table are copied into the executive's trap table. This is essential + * since this preserves critical trap handlers such as the window underflow + * and overflow handlers. It is the responsibility of the BSP to provide + * install these in the initial trap table. */ + + trap_table_start = (unsigned32) &_CPU_Trap_Table_area; + if (trap_table_start & (SPARC_TRAP_TABLE_ALIGNMENT-1)) + trap_table_start = (trap_table_start + SPARC_TRAP_TABLE_ALIGNMENT) & + ~(SPARC_TRAP_TABLE_ALIGNMENT-1); + + trap_table = (CPU_Trap_table_entry *) trap_table_start; + + sparc_get_tbr( tbr_value ); - _CPU_Thread_dispatch_pointer = thread_dispatch; + old_tbr = (CPU_Trap_table_entry *) (tbr_value & 0xfffff000); + + memcpy( trap_table, (void *) old_tbr, 256 * sizeof( CPU_Trap_table_entry ) ); + + sparc_set_tbr( trap_table_start ); /* - * If there is not an easy way to initialize the FP context - * during Context_Initialize, then it is usually easier to - * save an "uninitialized" FP context here and copy it to - * the task's during Context_Initialize. + * This seems to be the most appropriate way to obtain an initial + * FP context on the SPARC. The NULL fp context is copied it to + * the task's FP context during Context_Initialize. */ pointer = &_CPU_Null_fp_context; _CPU_Context_save_fp( &pointer ); + /* + * Grab our own copy of the user's CPU table. + */ + _CPU_Table = *cpu_table; + +#if defined(erc32) + + /* + * ERC32 specific initialization + */ + + _ERC32_MEC_Timer_Control_Mirror = 0; + ERC32_MEC.Timer_Control = 0; + + ERC32_MEC.Control |= ERC32_CONFIGURATION_POWER_DOWN_ALLOWED; + +#endif + } /*PAGE * * _CPU_ISR_Get_level + * + * Input Parameters: NONE + * + * Output Parameters: + * returns the current interrupt level (PIL field of the PSR) */ unsigned32 _CPU_ISR_Get_level( void ) @@ -64,134 +127,263 @@ unsigned32 _CPU_ISR_Get_level( void ) return level; } -/* _CPU_ISR_install_vector +/*PAGE + * + * _CPU_ISR_install_raw_handler + * + * This routine installs the specified handler as a "raw" non-executive + * supported trap handler (a.k.a. interrupt service routine). + * + * Input Parameters: + * vector - trap table entry number plus synchronous + * vs. asynchronous information + * new_handler - address of the handler to be installed + * old_handler - pointer to an address of the handler previously installed + * + * Output Parameters: NONE + * *new_handler - address of the handler previously installed + * + * NOTE: + * + * On the SPARC, there are really only 256 vectors. However, the executive + * has no easy, fast, reliable way to determine which traps are synchronous + * and which are asynchronous. By default, synchronous traps return to the + * instruction which caused the interrupt. So if you install a software + * trap handler as an executive interrupt handler (which is desirable since + * RTEMS takes care of window and register issues), then the executive needs + * to know that the return address is to the trap rather than the instruction + * following the trap. + * + * So vectors 0 through 255 are treated as regular asynchronous traps which + * provide the "correct" return address. Vectors 256 through 512 are assumed + * by the executive to be synchronous and to require that the return address + * be fudged. + * + * If you use this mechanism to install a trap handler which must reexecute + * the instruction which caused the trap, then it should be installed as + * an asynchronous trap. This will avoid the executive changing the return + * address. + */ + +void _CPU_ISR_install_raw_handler( + unsigned32 vector, + proc_ptr new_handler, + proc_ptr *old_handler +) +{ + unsigned32 real_vector; + CPU_Trap_table_entry *tbr; + CPU_Trap_table_entry *slot; + unsigned32 u32_tbr; + unsigned32 u32_handler; + + /* + * Get the "real" trap number for this vector ignoring the synchronous + * versus asynchronous indicator included with our vector numbers. + */ + + real_vector = SPARC_REAL_TRAP_NUMBER( vector ); + + /* + * Get the current base address of the trap table and calculate a pointer + * to the slot we are interested in. + */ + + sparc_get_tbr( u32_tbr ); + + u32_tbr &= 0xfffff000; + + tbr = (CPU_Trap_table_entry *) u32_tbr; + + slot = &tbr[ real_vector ]; + + /* + * Get the address of the old_handler from the trap table. + * + * NOTE: The old_handler returned will be bogus if it does not follow + * the RTEMS model. + */ + +#define HIGH_BITS_MASK 0xFFFFFC00 +#define HIGH_BITS_SHIFT 10 +#define LOW_BITS_MASK 0x000003FF + + if ( slot->mov_psr_l0 == _CPU_Trap_slot_template.mov_psr_l0 ) { + u32_handler = + ((slot->sethi_of_handler_to_l4 & HIGH_BITS_MASK) << HIGH_BITS_SHIFT) | + (slot->jmp_to_low_of_handler_plus_l4 & LOW_BITS_MASK); + *old_handler = (proc_ptr) u32_handler; + } else + *old_handler = 0; + + /* + * Copy the template to the slot and then fix it. + */ + + *slot = _CPU_Trap_slot_template; + + u32_handler = (unsigned32) new_handler; + + slot->mov_vector_l3 |= vector; + slot->sethi_of_handler_to_l4 |= + (u32_handler & HIGH_BITS_MASK) >> HIGH_BITS_SHIFT; + slot->jmp_to_low_of_handler_plus_l4 |= (u32_handler & LOW_BITS_MASK); +} + +/*PAGE + * + * _CPU_ISR_install_vector * * This kernel routine installs the RTEMS handler for the * specified vector. * * Input parameters: - * vector - interrupt vector number - * old_handler - former ISR for this vector number - * new_handler - replacement ISR for this vector number + * vector - interrupt vector number + * new_handler - replacement ISR for this vector number + * old_handler - pointer to former ISR for this vector number * - * Output parameters: NONE + * Output parameters: + * *old_handler - former ISR for this vector number * */ - void _CPU_ISR_install_vector( unsigned32 vector, proc_ptr new_handler, proc_ptr *old_handler ) { - *old_handler = _ISR_Vector_table[ vector ]; + unsigned32 real_vector; + proc_ptr ignored; + + /* + * Get the "real" trap number for this vector ignoring the synchronous + * versus asynchronous indicator included with our vector numbers. + */ + + real_vector = SPARC_REAL_TRAP_NUMBER( vector ); /* - * If the interrupt vector table is a table of pointer to isr entry - * points, then we need to install the appropriate RTEMS interrupt - * handler for this vector number. + * Return the previous ISR handler. */ + *old_handler = _ISR_Vector_table[ real_vector ]; + /* - * We put the actual user ISR address in '_ISR_vector_table'. This will - * be used by the _ISR_Handler so the user gets control. + * Install the wrapper so this ISR can be invoked properly. */ - _ISR_Vector_table[ vector ] = new_handler; -} + _CPU_ISR_install_raw_handler( vector, _ISR_Handler, &ignored ); -/*PAGE - * - * _CPU_Install_interrupt_stack - */ + /* + * We put the actual user ISR address in '_ISR_vector_table'. This will + * be used by the _ISR_Handler so the user gets control. + */ -void _CPU_Install_interrupt_stack( void ) -{ + _ISR_Vector_table[ real_vector ] = new_handler; } /*PAGE * * _CPU_Context_Initialize + * + * This kernel routine initializes the basic non-FP context area associated + * with each thread. + * + * Input parameters: + * the_context - pointer to the context area + * stack_base - address of memory for the SPARC + * size - size in bytes of the stack area + * new_level - interrupt level for this context area + * entry_point - the starting execution point for this this context + * is_fp - TRUE if this context is associated with an FP thread + * + * Output parameters: NONE */ -/* - * The following constants assist in building a thread's initial context. - */ - -#define CPU_FRAME_SIZE (112) /* based on disassembled test code */ -#define ADDR_ADJ_OFFSET -8 - void _CPU_Context_Initialize( - Context_Control *_the_context, - unsigned32 *_stack_base, - unsigned32 _size, - unsigned32 _new_level, - void *_entry_point + Context_Control *the_context, + unsigned32 *stack_base, + unsigned32 size, + unsigned32 new_level, + void *entry_point, + boolean is_fp ) { - unsigned32 jmp_addr; - unsigned32 _stack_high; /* highest "stack aligned" address */ - unsigned32 _the_size; + unsigned32 stack_high; /* highest "stack aligned" address */ + unsigned32 the_size; unsigned32 tmp_psr; - jmp_addr = (unsigned32) _entry_point; - /* * On CPUs with stacks which grow down (i.e. SPARC), we build the stack - * based on the _stack_high address. + * based on the stack_high address. */ - _stack_high = ((unsigned32)(_stack_base) + _size); - _stack_high &= ~(CPU_STACK_ALIGNMENT - 1); + stack_high = ((unsigned32)(stack_base) + size); + stack_high &= ~(CPU_STACK_ALIGNMENT - 1); - _the_size = _size & ~(CPU_STACK_ALIGNMENT - 1); + the_size = size & ~(CPU_STACK_ALIGNMENT - 1); -/* XXX following code is based on unix port */ /* - * XXX SPARC port needs a diagram like this one... - * See /usr/include/sys/stack.h in Solaris 2.3 for a nice - * diagram of the stack. + * See the README in this directory for a diagram of the stack. */ - _the_context->o7 = jmp_addr + ADDR_ADJ_OFFSET; - _the_context->o6 = (unsigned32)(_stack_high - CPU_FRAME_SIZE); - _the_context->i6 = (unsigned32)(_stack_high); -#if 0 - _the_context->rp = jmp_addr + ADDR_ADJ_OFFSET; - _the_context->sp = (unsigned32)(_stack_high - CPU_FRAME_SIZE); - _the_context->fp = (unsigned32)(_stack_high); -#endif + the_context->o7 = ((unsigned32) entry_point) - 8; + the_context->o6_sp = stack_high - CPU_MINIMUM_STACK_FRAME_SIZE; + the_context->i6_fp = stack_high; - _the_context->wim = 0x01; + /* + * Build the PSR for the task. Most everything can be 0 and the + * CWP is corrected during the context switch. + * + * The EF bit determines if the floating point unit is available. + * The FPU is ONLY enabled if the context is associated with an FP task + * and this SPARC model has an FPU. + */ sparc_get_psr( tmp_psr ); - tmp_psr &= ~SPARC_PIL_MASK; - tmp_psr |= (((_new_level) << 8) & SPARC_PIL_MASK); - tmp_psr = (tmp_psr & ~0x07) | 0x07; /* XXX should use num windows */ - _the_context->psr = tmp_psr; + tmp_psr &= ~SPARC_PSR_PIL_MASK; + tmp_psr |= (new_level << 8) & SPARC_PSR_PIL_MASK; + tmp_psr &= ~SPARC_PSR_EF_MASK; /* disabled by default */ + +#if (SPARC_HAS_FPU == 1) + /* + * If this bit is not set, then a task gets a fault when it accesses + * a floating point register. This is a nice way to detect floating + * point tasks which are not currently declared as such. + */ + + if ( is_fp ) + tmp_psr |= SPARC_PSR_EF_MASK; +#endif + the_context->psr = tmp_psr; } /*PAGE * * _CPU_Internal_threads_Idle_thread_body * - * NOTES: - * - * 1. This is the same as the regular CPU independent algorithm. - * - * 2. If you implement this using a "halt", "idle", or "shutdown" - * instruction, then don't forget to put it in an infinite loop. + * Some SPARC implementations have low power, sleep, or idle modes. This + * tries to take advantage of those models. + */ + +#if (CPU_PROVIDES_IDLE_THREAD_BODY == TRUE) + +/* + * This is the implementation for the erc32. * - * 3. Be warned. Some processors with onboard DMA have been known - * to stop the DMA if the CPU were put in IDLE mode. This might - * also be a problem with other on-chip peripherals. So use this - * hook with caution. + * NOTE: Low power mode was enabled at initialization time. */ +#if defined(erc32) + void _CPU_Internal_threads_Idle_thread_body( void ) { - - for( ; ; ) - /* insert your "halt" instruction here */ ; + while (1) { + ERC32_MEC.Power_Down = 0; /* value is irrelevant */ + } } + +#endif + +#endif /* CPU_PROVIDES_IDLE_THREAD_BODY */ diff --git a/cpukit/score/cpu/sparc/rtems/asm.h b/cpukit/score/cpu/sparc/rtems/asm.h index 10157171c5..a3d62416b8 100644 --- a/cpukit/score/cpu/sparc/rtems/asm.h +++ b/cpukit/score/cpu/sparc/rtems/asm.h @@ -28,7 +28,9 @@ */ #define ASM + #include +#include /* * Recent versions of GNU cpp define variables which indicate the @@ -37,7 +39,9 @@ * have to define these as appropriate. */ -/* XXX This does not appear to work on gcc 2.7.0 on the sparc */ +/* XXX __USER_LABEL_PREFIX__ and __REGISTER_PREFIX__ do not work on gcc 2.7.0 */ +/* XXX The following ifdef magic fixes the problem but results in a warning */ +/* XXX when compiling assembly code. */ #undef __USER_LABEL_PREFIX__ #ifndef __USER_LABEL_PREFIX__ #define __USER_LABEL_PREFIX__ _ @@ -91,6 +95,16 @@ #define PUBLIC(sym) .globl SYM (sym) #define EXTERN(sym) .globl SYM (sym) +/* + * Entry for traps which jump to a programmer-specified trap handler. + */ + +#define TRAP(_vector, _handler) \ + mov %psr, %l0 ; \ + sethi %hi(_handler), %l4 ; \ + jmp %l4+%lo(_handler); \ + mov _vector, %l3 + #endif /* end of include file */ diff --git a/cpukit/score/cpu/unix/cpu.c b/cpukit/score/cpu/unix/cpu.c index 347882ddcc..c77276020b 100644 --- a/cpukit/score/cpu/unix/cpu.c +++ b/cpukit/score/cpu/unix/cpu.c @@ -341,17 +341,10 @@ void _CPU_Install_interrupt_stack( void ) * * _CPU_Internal_threads_Idle_thread_body * - * NOTES: - * - * 1. This is the same as the regular CPU independent algorithm. - * - * 2. If you implement this using a "halt", "idle", or "shutdown" - * instruction, then don't forget to put it in an infinite loop. - * - * 3. Be warned. Some processors with onboard DMA have been known - * to stop the DMA if the CPU were put in IDLE mode. This might - * also be a problem with other on-chip peripherals. So use this - * hook with caution. + * Stop until we get a signal which is the logically the same thing + * entering low-power or sleep mode on a real processor and waiting for + * an interrupt. This significantly reduces the consumption of host + * CPU cycles which is again similar to low power mode. */ void _CPU_Internal_threads_Idle_thread_body( void ) @@ -370,7 +363,8 @@ void _CPU_Context_Initialize( unsigned32 *_stack_base, unsigned32 _size, unsigned32 _new_level, - void *_entry_point + void *_entry_point, + boolean _is_fp ) { void *source; @@ -697,49 +691,15 @@ void _CPU_Fatal_error(unsigned32 error) _exit(error); } -/*PAGE - * - * _CPU_ffs - */ - -int _CPU_ffs(unsigned32 value) -{ - int output; - extern int ffs( int ); - - output = ffs(value); - output = output - 1; - - return output; -} - - /* * Special Purpose Routines to hide the use of UNIX system calls. */ -#if 0 -/* XXX clock had this set of #define's */ - -/* - * In order to get the types and prototypes used in this file under - * Solaris 2.3, it is necessary to pull the following magic. - */ - -#if defined(solaris) -#warning "Ignore the undefining __STDC__ warning" -#undef __STDC__ -#define __STDC__ 0 -#undef _POSIX_C_SOURCE -#endif -#endif - int _CPU_Get_clock_vector( void ) { return SIGALRM; } - void _CPU_Start_clock( int microseconds ) -- cgit v1.2.3