summaryrefslogtreecommitdiffstats
path: root/cpukit/score/cpu/or32/cpu_asm.c
diff options
context:
space:
mode:
Diffstat (limited to 'cpukit/score/cpu/or32/cpu_asm.c')
-rw-r--r--cpukit/score/cpu/or32/cpu_asm.c432
1 files changed, 414 insertions, 18 deletions
diff --git a/cpukit/score/cpu/or32/cpu_asm.c b/cpukit/score/cpu/or32/cpu_asm.c
index 2e7623fb69..0b149ad7b4 100644
--- a/cpukit/score/cpu/or32/cpu_asm.c
+++ b/cpukit/score/cpu/or32/cpu_asm.c
@@ -13,7 +13,9 @@
* found in the file LICENSE in this distribution or at
* http://www.OARcorp.com/rtems/license.html.
*
- * $Id$
+ * This file adapted from no_bsp board library of the RTEMS distribution.
+ * The body has been modified for the Bender Or1k implementation by
+ * Chris Ziomkowski. <chris@asics.ws>
*/
/*
@@ -38,15 +40,84 @@
* like a (Context_Control_fp *). The general rule on making this decision
* is to avoid writing assembly language.
*
- * OR32 Specific Information:
+ * or1k specific Information:
*
- * XXX document implementation including references if appropriate
+ * This implementation of RTEMS considers the concept of
+ * "fast context switching", as defined in the or1k architecture
+ * specification. Whether or not this makes a significant
+ * impact on speed is dubious, however it is not a significant
+ * impediment to include it. It probably wastes a few cycles on
+ * every floating point context switch.
+ *
+ * This implementation will currently not work on a processor where
+ * the integer unit and floating point unit are not the same size. I
+ * am waiting on an architecture change to make this feasible. It
+ * should work fine on 64 bit architectures, except for the fact that
+ * the variables are declared as 32 bits. This shouldn't really make
+ * a difference, as the fact that they must be registers should force
+ * them into a 64 bit word anyway.
+ *
+ * The decision as to whether to do 32 or 64 bit saves is performed
+ * at run time based on the configuration of the CPUCFGR register. This
+ * takes a performance hit of a few cycles, but this should be a very
+ * small percentage of the total number of cycles necessary to do the
+ * save, and doesn't require special code for 32 or 64 bit versions.
+ *
+ * ADDITIONAL INFORMATION:
+ *
+ * It has been unanimously agreed that floating point will not be
+ * included in the initial releases of the Or1k chips, and that
+ * significant changes to the floating point architecture may
+ * occur before any such release will ever be implemented. The code
+ * below is therefore never called and never used.
*/
void _CPU_Context_save_fp(
void **fp_context_ptr
)
{
+ register unsigned32 temp;
+ register unsigned32 address = (unsigned32)(*fp_context_ptr);
+ register unsigned32 xfer;
+ register unsigned32 loop;
+
+ /* %0 is a temporary register which is used for several
+ values throughout the code. %3 contains the address
+ to save the context, and is modified during the course
+ of the context save. %1 is a second dummy register
+ which is used during transfer of the floating point
+ value to memory. %2 is an end of loop marker which
+ is compared against the pointer %3. */
+
+ asm volatile ("l.mfspr %0,r0,0x02 \n\t" /* CPUCFGR */
+ "l.andi %0,%0,0x380 \n\t" /* OF32S or OV64S or OF64S */
+ "l.sfnei %0,0x0 \n\t"
+ "l.bf _L_nofps \n\t" /* exit if no floating point */
+ "l.sfeqi %0,0x080 \n\t" /* (DELAY) single precision? */
+ "l.mfspr %0,r0,0x11 \n\t" /* Load Status Register */
+ "l.srli %0,%0,58 \n\t" /* Move CID into low byte*32 */
+ "l.bnf _L_spfp_loops \n\t" /* Branch on single precision */
+ "l.addi %2,%0,0x20 \n" /* Terminating condition */
+ /**** Double Precision Floating Point Section ****/
+ "_L_dpfp_loops: \n\t"
+ "l.mfspr %1,%0,0x600 \n\t" /* Load VFRx */
+ "l.sd 0(%3),%1 \n\t" /* Save VFRx */
+ "l.addi %0,%0,0x01 \n\t" /* Increment counter */
+ "l.sfeq %0,%2 \n\t" /* Branch if incomplete */
+ "l.bf _L_dpfp_loops \n\t"
+ "l.addi %3,%3,0x08 \n\t" /* (DELAY) update pointer */
+ "l.bnf _L_nofps \n\t" /* exit */
+ "l.nop \n"
+ /**** Single Precision Floating Point Section ****/
+ "_L_spfp_loops: \n\t"
+ "l.mfspr %1,%0,0x600 \n\t" /* Load VFRx */
+ "l.sw 0(%3),%1 \n\t" /* Save VFRx */
+ "l.addi %0,%0,0x01 \n\t" /* Increment counter */
+ "l.sfeq %0,%2 \n\t" /* Branch if incomplete */
+ "l.bf _L_spfp_loops \n\t"
+ "l.addi %3,%3,0x04 \n" /* (DELAY) update pointer */
+ "_L_nofps: \n\t" /* End of context save */
+ : "=&r" (temp), "=r" (xfer), "=&r" (loop), "+r" (address));
}
/*
@@ -56,27 +127,63 @@ void _CPU_Context_save_fp(
* at *fp_context_ptr. If the point to load the FP context
* from is changed then the pointer is modified by this routine.
*
- * Sometimes a macro implementation of this is in cpu.h which dereferences
- * the ** and a similarly named routine in this file is passed something
- * like a (Context_Control_fp *). The general rule on making this decision
- * is to avoid writing assembly language.
- *
- * OR32 Specific Information:
- *
- * XXX document implementation including references if appropriate
+ *
*/
void _CPU_Context_restore_fp(
void **fp_context_ptr
)
{
+ register unsigned32 temp;
+ register unsigned32 address = (unsigned32)(*fp_context_ptr);
+ register unsigned32 xfer;
+ register unsigned32 loop;
+
+ /* The reverse of Context_save_fp */
+ /* %0 is a temporary register which is used for several
+ values throughout the code. %1 contains the address
+ to save the context, and is modified during the course
+ of the context save. %2 is a second dummy register
+ which is used during transfer of the floating point
+ value to memory. %3 is an end of loop marker which
+ is compared against the pointer %1. */
+
+ asm volatile ("l.mfspr %0,r0,0x02 \n\t" /* CPUCFGR */
+ "l.andi %0,%0,0x380 \n\t" /* OF32S or OV64S or OF64S */
+ "l.sfnei %0,0x0 \n\t"
+ "l.bf _L_nofpr \n\t" /* exit if no floating point */
+ "l.sfeqi %0,0x080 \n\t" /* (DELAY) single precision? */
+ "l.mfspr %0,r0,0x11 \n\t" /* Load Status Register */
+ "l.srli %0,%0,58 \n\t" /* Move CID into low byte*32 */
+ "l.bnf _L_spfp_loopr \n\t" /* Branch on single precision */
+ "l.addi %3,%0,0x20 \n" /* Terminating condition */
+ /**** Double Precision Floating Point Section ****/
+ "_L_dpfp_loopr: \n\t"
+ "l.mfspr %2,%0,0x600 \n\t" /* Load VFRx */
+ "l.sd 0(%1),%2 \n\t" /* Save VFRx */
+ "l.addi %0,%0,0x01 \n\t" /* Increment counter */
+ "l.sfeq %0,%3 \n\t" /* Branch if incomplete */
+ "l.bf _L_dpfp_loopr \n\t"
+ "l.addi %1,%1,0x08 \n\t" /* (DELAY) update pointer */
+ "l.bnf _L_nofpr \n\t" /* exit */
+ "l.nop \n"
+ /**** Single Precision Floating Point Section ****/
+ "_L_spfp_loopr: \n\t"
+ "l.mfspr %2,%0,0x600 \n\t" /* Load VFRx */
+ "l.sw 0(%1),%2 \n\t" /* Save VFRx */
+ "l.addi %0,%0,0x01 \n\t" /* Increment counter */
+ "l.sfeq %0,%3 \n\t" /* Branch if incomplete */
+ "l.bf _L_spfp_loopr \n\t"
+ "l.addi %1,%1,0x04 \n" /* (DELAY) update pointer */
+ "_L_nofpr: \n\t" /* End of context save */
+ : "=&r" (temp), "+r" (address), "=r" (xfer), "=&r" (loop));
}
/* _CPU_Context_switch
*
* This routine performs a normal non-FP context switch.
*
- * OR32 Specific Information:
+ * NO_CPU Specific Information:
*
* XXX document implementation including references if appropriate
*/
@@ -86,6 +193,278 @@ void _CPU_Context_switch(
Context_Control *heir
)
{
+ register unsigned32 temp1 = 0;
+ register unsigned32 temp2 = 0;
+
+ /* This function is really tricky. When this function is called,
+ we should save our state as we need it, and then grab the
+ new state from the pointer. We then do a longjump to this
+ code, replacing the current stack pointer with the new
+ environment. This function never returns. Instead, at some
+ later time, another person will call context switch with
+ our pointer in the heir variable, and they will longjump
+ to us. We will then continue. Let's see how this works... */
+
+ /* Technically, we could probably not worry about saving r3
+ and r4, since these are parameters guaranteed to be saved
+ by the calling function. We could also probably get away
+ without saving r11, as that is filled in by the return
+ statement. But as a first cut I'm in favor of just saving
+ everything.... */
+
+ /* We could be more efficient and use compile time directives
+ for 32 or 64 bit, but this will allow the code to run on
+ everything without modification. Feel free to comment the
+ relevant sections out if you don't need it. */
+
+ /* We should probably write this whole routine in assembly
+ so that we can have seperate entry points for self restore
+ or context switch. You can't jump to local labels from
+ inline assembly across function calls, and I don't feel
+ like embedding all the .global directives here...it really
+ screws up the debugger. Oh well, what's 2 more instructions
+ and a branch really cost... */
+
+ /* One thing which we should do is check for 32 or 64 bit models
+ first, and then do one branch to the appropriate code section.
+ Currently, we check the architecture bit in CPUCFGR twice. Once
+ during the load section and again during restore. That is inefficient,
+ and considering this code is huge anyway, saving the few bytes
+ simply doesn't make any practical sense. FIX THIS LATER. */
+
+ /* Note that this routine assumes software context switches are
+ done with the same CID. In other words, it will not manage
+ the CIDs and assign a new one as necessary. If you tell it
+ to restore a context at CID 2, and the current one is at CID
+ 4, it will do what it is told. It will overwrite the registers
+ for context ID 2, meaning they are irretrievably lost. I hope
+ you saved them earlier.... */
+
+ /* Note that you can have a context jump anywhere you want, although
+ by default we will jump to the L_restore label. If you then modify
+ the location in the Context_Control structure, it will continue
+ whereever you told it to go. Note however that you had better
+ also have cleaned up the stack and frame pointers though, because
+ they are probably still set with the values obtained from
+ entering this function... */
+
+ asm volatile ("l.sfeqi %3,0x0 \n\t" /* Is this a self restore? */
+ "l.bf _L_restore \n\t" /* Yes it is...go there */
+ "l.nop \n\t"
+
+ "l.lwz %0,0(%3) \n\t" /* Prefetch new context */
+ "l.mfspr %2,r0,0x11 \n\t" /* Status Register */
+ "l.sw 0(%1),%2 \n\t" /* Save it */
+ "l.srli %2,%2,28 \n\t" /* Move CID into low byte */
+ "l.mfspr %0,%2,0x20 \n\t" /* Offset from EPCR */
+ "l.sw 4(%1),%0 \n\t" /* Store it */
+ "l.mfspr %0,%2,0x30 \n\t" /* Offset from EEAR */
+ "l.sw 8(%1),%0 \n\t" /* Store it */
+ "l.mfspr %0,%2,0x40 \n\t" /* Offset from ESR */
+ "l.sw 12(%1),%0 \n\t" /* Store it */
+ "l.mfspr %0,r0,0x02 \n\t" /* CPUCFGR */
+ "l.andi %0,%0,0x40 \n\t" /* OB64S */
+ "l.sfnei %0,0x0 \n\t"
+ "l.bf _L_64bit \n\t" /* 64 bit architecture */
+ "l.movhi %0,hi(_L_restore)\n\t"
+
+ /**** 32 bit implementation ****/
+ "l.ori %0,%0,lo(_L_restore)\n\t"
+ "l.sw 140(%1),%0 \n\t" /* Save the PC */
+ "l.lwz %0,140(%3) \n\t" /* New PC. Expect cache miss */
+ "l.sw 16(%1),r1 \n\t"
+ "l.sw 20(%1),r2 \n\t"
+ "l.sw 24(%1),r3 \n\t"
+ "l.sw 28(%1),r4 \n\t"
+ "l.sw 32(%1),r5 \n\t"
+ "l.sw 36(%1),r6 \n\t"
+ "l.sw 40(%1),r7 \n\t"
+ "l.sw 44(%1),r8 \n\t"
+ "l.sw 48(%1),r9 \n\t"
+ "l.sw 52(%1),r10 \n\t"
+ "l.sw 56(%1),r11 \n\t"
+ "l.sw 60(%1),r12 \n\t"
+ "l.sw 64(%1),r13 \n\t"
+ "l.sw 68(%1),r14 \n\t"
+ "l.sw 72(%1),r15 \n\t"
+ "l.sw 76(%1),r16 \n\t"
+ "l.sw 80(%1),r17 \n\t"
+ "l.sw 84(%1),r18 \n\t"
+ "l.sw 88(%1),r19 \n\t"
+ "l.sw 92(%1),r20 \n\t"
+ "l.sw 96(%1),r21 \n\t"
+ "l.sw 100(%1),r22 \n\t"
+ "l.sw 104(%1),r23 \n\t"
+ "l.sw 108(%1),r24 \n\t"
+ "l.sw 112(%1),r25 \n\t"
+ "l.sw 116(%1),r26 \n\t"
+ "l.sw 120(%1),r27 \n\t"
+ "l.sw 124(%1),r28 \n\t"
+ "l.sw 128(%1),r29 \n\t"
+ "l.sw 132(%1),r30 \n\t"
+ "l.jr %0 \n\t" /* Go there */
+ "l.sw 136(%1),r31 \n" /* Store the last reg */
+
+ /**** 64 bit implementation ****/
+ "_L_64bit: \n\t"
+ "l.ori %0,%0,lo(_L_restore)\n\t"
+ "l.sw 264(%1),%0 \n\t"
+ "l.sd 16(%1),r1 \n\t"
+ "l.sd 24(%1),r2 \n\t"
+ "l.sd 32(%1),r3 \n\t"
+ "l.sd 40(%1),r4 \n\t"
+ "l.sd 48(%1),r5 \n\t"
+ "l.sd 56(%1),r6 \n\t"
+ "l.sd 64(%1),r7 \n\t"
+ "l.sd 72(%1),r8 \n\t"
+ "l.sd 80(%1),r9 \n\t"
+ "l.sd 88(%1),r10 \n\t"
+ "l.sd 96(%1),r11 \n\t"
+ "l.sd 104(%1),r12 \n\t"
+ "l.sd 112(%1),r13 \n\t"
+ "l.sd 120(%1),r14 \n\t"
+ "l.sd 128(%1),r15 \n\t"
+ "l.sd 136(%1),r16 \n\t"
+ "l.sd 144(%1),r17 \n\t"
+ "l.sd 152(%1),r18 \n\t"
+ "l.sd 160(%1),r19 \n\t"
+ "l.sd 168(%1),r20 \n\t"
+ "l.sd 176(%1),r21 \n\t"
+ "l.sd 184(%1),r22 \n\t"
+ "l.sd 192(%1),r23 \n\t"
+ "l.sd 200(%1),r24 \n\t"
+ "l.sd 208(%1),r25 \n\t"
+ "l.sd 216(%1),r26 \n\t"
+ "l.sd 224(%1),r27 \n\t"
+ "l.sd 232(%1),r28 \n\t"
+ "l.sd 240(%1),r29 \n\t"
+ "l.sd 248(%1),r30 \n\t"
+ "l.jr %0 \n\t" /* Go to the new PC */
+ "l.sd 256(%1),r31 \n" /* Store the last reg */
+
+ /**** The restoration routine. ****/
+
+ /* Note that when we return from this function,
+ we will actually be returning to a different
+ context than when we left. The debugger might
+ have conniptions over this, but we'll have to
+ reengineer that later. The stack and status
+ registers will all be changed, however we
+ will not touch the global interrupt mask. */
+
+ /* Also note, when doing any restore, the most
+ important registers are r1, r2, and r9. These
+ will be accessed immediately upon exiting the
+ routine, and so we want to make sure we load
+ them as early as possible in case they are
+ not in cache */
+
+ "_L_restore: \n\t" /* Restore "heir" */
+ "l.mfspr %2,r0,0x11 \n\t" /* Status Register */
+ "l.movhi %0,0x07FF \n\t" /* ~SR mask */
+ "l.ori %0,%0,0xD1FF \n\t"
+ "l.and %2,%0,%2 \n\t" /* save the global bits */
+ "l.movhi %0,0xF800 \n\t" /* SR mask */
+ "l.ori %0,%0,0x2E00 \n\t"
+ "l.lwz %1,0(%3) \n\t" /* Get the previous SR */
+ "l.and %0,%1,%0 \n\t" /* Mask out the global bits */
+ "l.or %2,%2,%0 \n\t" /* Combine local/global */
+ "l.mtspr r0,%2,0x11 \n\t" /* Restore the status register */
+
+ "l.mfspr %0,r0,0x02 \n\t" /* CPUCFGR */
+ "l.andi %0,%0,0x40 \n\t" /* OB64S */
+ "l.sfnei %0,0x0 \n\t" /* Save the 64 bit flag */
+
+ "l.srli %2,%2,28 \n\t" /* Move CID into low byte */
+ "l.lwz %0,4(%3) \n\t"
+ "l.mtspr %2,%0,0x20 \n\t" /* Offset from EPCR */
+ "l.lwz %0,8(%3) \n\t"
+ "l.mtspr %2,%0,0x30 \n\t" /* Offset from EEAR */
+ "l.lwz %0,12(%3) \n\t"
+
+ "l.bf _L_r64bit \n\t" /* 64 bit architecture */
+ "l.mtspr %2,%0,0x30 \n\t" /* Offset from EEAR (DELAY) */
+
+ /**** 32 bit restore ****/
+ "l.lwz r1,16(%3) \n\t"
+ "l.lwz r2,20(%3) \n\t"
+ "l.lwz r9,48(%3) \n\t"
+ "l.lwz r3,24(%3) \n\t"
+ "l.lwz r4,28(%3) \n\t"
+ "l.lwz r5,32(%3) \n\t"
+ "l.lwz r6,36(%3) \n\t"
+ "l.lwz r7,40(%3) \n\t"
+ "l.lwz r8,44(%3) \n\t"
+ "l.lwz r10,52(%3) \n\t"
+ "l.lwz r11,56(%3) \n\t"
+ "l.lwz r12,60(%3) \n\t"
+ "l.lwz r13,64(%3) \n\t"
+ "l.lwz r14,68(%3) \n\t"
+ "l.lwz r15,72(%3) \n\t"
+ "l.lwz r16,76(%3) \n\t"
+ "l.lwz r17,80(%3) \n\t"
+ "l.lwz r18,84(%3) \n\t"
+ "l.lwz r19,88(%3) \n\t"
+ "l.lwz r20,92(%3) \n\t"
+ "l.lwz r21,96(%3) \n\t"
+ "l.lwz r22,100(%3) \n\t"
+ "l.lwz r23,104(%3) \n\t"
+ "l.lwz r24,108(%3) \n\t"
+ "l.lwz r25,112(%3) \n\t"
+ "l.lwz r26,116(%3) \n\t"
+ "l.lwz r27,120(%3) \n\t"
+ "l.lwz r28,124(%3) \n\t"
+ "l.lwz r29,128(%3) \n\t"
+ "l.lwz r30,132(%3) \n\t"
+ "l.j _L_return \n\t"
+ "l.lwz r31,136(%3) \n"
+
+ /**** 64 bit restore ****/
+ "_L_r64bit: \n\t"
+ "l.ld r1,16(%3) \n\t"
+ "l.ld r2,24(%3) \n\t"
+ "l.ld r9,80(%3) \n\t"
+ "l.ld r3,32(%3) \n\t"
+ "l.ld r4,40(%3) \n\t"
+ "l.ld r5,48(%3) \n\t"
+ "l.ld r6,56(%3) \n\t"
+ "l.ld r7,64(%3) \n\t"
+ "l.ld r8,72(%3) \n\t"
+ "l.ld r10,88(%3) \n\t"
+ "l.ld r11,96(%3) \n\t"
+ "l.ld r12,104(%3) \n\t"
+ "l.ld r13,112(%3) \n\t"
+ "l.ld r14,120(%3) \n\t"
+ "l.ld r15,128(%3) \n\t"
+ "l.ld r16,136(%3) \n\t"
+ "l.ld r17,144(%3) \n\t"
+ "l.ld r18,152(%3) \n\t"
+ "l.ld r19,160(%3) \n\t"
+ "l.ld r20,168(%3) \n\t"
+ "l.ld r21,176(%3) \n\t"
+ "l.ld r22,184(%3) \n\t"
+ "l.ld r23,192(%3) \n\t"
+ "l.ld r24,200(%3) \n\t"
+ "l.ld r25,208(%3) \n\t"
+ "l.ld r26,216(%3) \n\t"
+ "l.ld r27,224(%3) \n\t"
+ "l.ld r28,232(%3) \n\t"
+ "l.ld r29,240(%3) \n\t"
+ "l.ld r30,248(%3) \n\t"
+ "l.ld r31,256(%3) \n"
+
+ "_L_return: \n\t" /* End of routine */
+
+ : "=&r" (temp1), "+r" (run), "=&r" (temp2)
+ : "r" (heir));
+
+ /* Note that some registers were used for parameter passing and
+ temporary registeres (temp1 and temp2). These values were
+ saved and restored across context calls, but the values that
+ the caller needs should have been stored on the stack. The
+ C code should now restore these from the stack, since r1 and
+ r2 have been restored, and return to the location specified
+ by r9. Then, all should be happy in the world. */
}
/*
@@ -96,27 +475,31 @@ void _CPU_Context_switch(
*
* NOTE: May be unnecessary to reload some registers.
*
- * OR32 Specific Information:
+ * Or1k Specific Information:
*
- * XXX document implementation including references if appropriate
+ * In our implementation, this simply redirects to swich context
*/
void _CPU_Context_restore(
- Context_Control *new_context
+ Context_Control *run
)
{
+ _CPU_Context_switch(run,NULL);
}
+
/* void __ISR_Handler()
*
* This routine provides the RTEMS interrupt management.
*
- * OR32 Specific Information:
+ * Or1k Specific Information:
*
- * XXX document implementation including references if appropriate
+ * Based on the Or1k interrupt architecture described in chapter 16
+ * and the exception architecture described in chapter 9
*/
-void _ISR_Handler()
+void _ISR_Handler(unsigned32 vector,unsigned32 ProgramCounter,
+ unsigned32 EffectiveAddress,unsigned32 StatusRegister)
{
/*
* This discussion ignores a lot of the ugly details in a real
@@ -179,5 +562,18 @@ void _ISR_Handler()
* prepare to get out of interrupt
* return from interrupt
*/
+
+ /* In the Or1k architecture, exceptions are handled in the
+ startup code of the board support package. Thus, this
+ routine is never called. Or1k exception routines are called
+ with the following prototype:
+
+ function(int vector#, int PC, int Address, int StatusRegister);
+
+ These parameters are snapshots of the system when the exception
+ was encountered. If virtual memory is active, things like the
+ PC and Address may have little meaning, as they are referenced
+ in physical space, not the virtual space of the process.
+ */
}