summaryrefslogtreecommitdiffstats
path: root/cpukit/score/cpu/or32/cpu_asm.c
blob: 5580ba3115e30e4ff77e22041a929af9a29a8eaa (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
/*  cpu_asm.c  ===> cpu_asm.S or cpu_asm.s
 *
 *  This file contains the basic algorithms for all assembly code used
 *  in an specific CPU port of RTEMS.  These algorithms must be implemented
 *  in assembly language
 *
 *  NOTE:  This is supposed to be a .S or .s file NOT a C file.
 *
 *  COPYRIGHT (c) 1989-1999.
 *  On-Line Applications Research Corporation (OAR).
 *
 *  The license and distribution terms for this file may be
 *  found in the file LICENSE in this distribution or at
 *  http://www.rtems.com/license/LICENSE.
 *
 *  This file adapted from no_bsp board library of the RTEMS distribution.
 *  The body has been modified for the Bender Or1k implementation by
 *  Chris Ziomkowski. <chris@asics.ws>
 */

/*
 *  This is supposed to be an assembly file.  This means that system.h
 *  and cpu.h should not be included in a "real" cpu_asm file.  An
 *  implementation in assembly should include "cpu_asm.h>
 */

#include <rtems/system.h>
#include <rtems/score/cpu.h>
/* #include "cpu_asm.h> */

/*
 *  _CPU_Context_save_fp_context
 *
 *  This routine is responsible for saving the FP context
 *  at *fp_context_ptr.  If the point to load the FP context
 *  from is changed then the pointer is modified by this routine.
 *
 *  Sometimes a macro implementation of this is in cpu.h which dereferences
 *  the ** and a similarly named routine in this file is passed something
 *  like a (Context_Control_fp *).  The general rule on making this decision
 *  is to avoid writing assembly language.
 *
 *  or1k specific Information:
 *
 *  This implementation of RTEMS considers the concept of
 *  "fast context switching", as defined in the or1k architecture
 *  specification. Whether or not this makes a significant
 *  impact on speed is dubious, however it is not a significant
 *  impediment to include it. It probably wastes a few cycles on
 *  every floating point context switch.
 *
 *  This implementation will currently not work on a processor where
 *  the integer unit and floating point unit are not the same size. I
 *  am waiting on an architecture change to make this feasible. It
 *  should work fine on 64 bit architectures, except for the fact that
 *  the variables are declared as 32 bits. This shouldn't really make
 *  a difference, as the fact that they must be registers should force
 *  them into a 64 bit word anyway.
 *
 *  The decision as to whether to do 32 or 64 bit saves is performed
 *  at run time based on the configuration of the CPUCFGR register. This
 *  takes a performance hit of a few cycles, but this should be a very
 *  small percentage of the total number of cycles necessary to do the
 *  save, and doesn't require special code for 32 or 64 bit versions.
 *
 *  ADDITIONAL INFORMATION:
 *
 *  It has been unanimously agreed that floating point will not be
 *  included in the initial releases of the Or1k chips, and that
 *  significant changes to the floating point architecture may
 *  occur before any such release will ever be implemented. The code
 *  below is therefore never called and never used.
 */

void _CPU_Context_save_fp(
  void **fp_context_ptr
)
{
  register uint32_t   temp;
  register uint32_t   address = (uint32_t  )(*fp_context_ptr);
  register uint32_t   xfer;
  register uint32_t   loop;

  /* %0 is a temporary register which is used for several
     values throughout the code. %3 contains the address
     to save the context, and is modified during the course
     of the context save. %1 is a second dummy register
     which is used during transfer of the floating point
     value to memory. %2 is an end of loop marker which
     is compared against the pointer %3. */

  asm volatile ("l.mfspr  %0,r0,0x02    \n\t"  /* CPUCFGR */
	       "l.andi   %0,%0,0x380   \n\t"  /* OF32S or OV64S or OF64S */
	       "l.sfnei  %0,0x0        \n\t"
	       "l.bf     _L_nofps      \n\t"  /* exit if no floating point  */
	       "l.sfeqi  %0,0x080      \n\t"  /* (DELAY) single precision?  */
	       "l.mfspr  %0,r0,0x11    \n\t"  /* Load Status Register       */
	       "l.srli   %0,%0,58      \n\t"  /* Move CID into low byte*32  */
	       "l.bnf    _L_spfp_loops \n\t"  /* Branch on single precision */
	       "l.addi   %2,%0,0x20    \n"    /* Terminating condition      */
	       /**** Double Precision Floating Point Section ****/
	       "_L_dpfp_loops:         \n\t"
	       "l.mfspr  %1,%0,0x600   \n\t"  /* Load VFRx                  */
	       "l.sd     0(%3),%1      \n\t"  /* Save VFRx                  */
	       "l.addi   %0,%0,0x01    \n\t"  /* Increment counter          */
	       "l.sfeq   %0,%2         \n\t"  /* Branch if incomplete       */
	       "l.bf     _L_dpfp_loops \n\t"
	       "l.addi   %3,%3,0x08    \n\t"  /* (DELAY) update pointer     */
	       "l.bnf    _L_nofps      \n\t"  /* exit                       */
	       "l.nop                  \n"
	       /**** Single Precision Floating Point Section ****/
	       "_L_spfp_loops:         \n\t"
	       "l.mfspr  %1,%0,0x600   \n\t"  /* Load VFRx                  */
	       "l.sw     0(%3),%1      \n\t"  /* Save VFRx                  */
	       "l.addi   %0,%0,0x01    \n\t"  /* Increment counter          */
	       "l.sfeq   %0,%2         \n\t"  /* Branch if incomplete       */
	       "l.bf     _L_spfp_loops \n\t"
	       "l.addi   %3,%3,0x04    \n"    /* (DELAY) update pointer     */
	       "_L_nofps:              \n\t"  /* End of context save        */
		: "=&r" (temp), "=r" (xfer), "=&r" (loop), "+r" (address));
}

/*
 *  _CPU_Context_restore_fp_context
 *
 *  This routine is responsible for restoring the FP context
 *  at *fp_context_ptr.  If the point to load the FP context
 *  from is changed then the pointer is modified by this routine.
 *
 *  
 */

void _CPU_Context_restore_fp(
  void **fp_context_ptr
)
{
  register uint32_t   temp;
  register uint32_t   address = (uint32_t  )(*fp_context_ptr);
  register uint32_t   xfer;
  register uint32_t   loop;

  /* The reverse of Context_save_fp */
  /* %0 is a temporary register which is used for several
     values throughout the code. %1 contains the address
     to save the context, and is modified during the course
     of the context save. %2 is a second dummy register
     which is used during transfer of the floating point
     value to memory. %3 is an end of loop marker which
     is compared against the pointer %1. */

  asm volatile ("l.mfspr  %0,r0,0x02    \n\t"  /* CPUCFGR */
	       "l.andi   %0,%0,0x380   \n\t"  /* OF32S or OV64S or OF64S */
	       "l.sfnei  %0,0x0        \n\t"
	       "l.bf     _L_nofpr      \n\t"  /* exit if no floating point  */
	       "l.sfeqi  %0,0x080      \n\t"  /* (DELAY) single precision?  */
	       "l.mfspr  %0,r0,0x11    \n\t"  /* Load Status Register       */
	       "l.srli   %0,%0,58      \n\t"  /* Move CID into low byte*32  */
	       "l.bnf    _L_spfp_loopr \n\t"  /* Branch on single precision */
	       "l.addi   %3,%0,0x20    \n"    /* Terminating condition      */
	       /**** Double Precision Floating Point Section ****/
	       "_L_dpfp_loopr:          \n\t"
	       "l.mfspr  %2,%0,0x600   \n\t"  /* Load VFRx                  */
	       "l.sd     0(%1),%2      \n\t"  /* Save VFRx                  */
	       "l.addi   %0,%0,0x01    \n\t"  /* Increment counter          */
	       "l.sfeq   %0,%3         \n\t"  /* Branch if incomplete       */
	       "l.bf     _L_dpfp_loopr \n\t"
	       "l.addi   %1,%1,0x08    \n\t"  /* (DELAY) update pointer     */
	       "l.bnf    _L_nofpr      \n\t"  /* exit                       */
	       "l.nop                  \n"
	       /**** Single Precision Floating Point Section ****/
	       "_L_spfp_loopr:         \n\t"
	       "l.mfspr  %2,%0,0x600   \n\t"  /* Load VFRx                  */
	       "l.sw     0(%1),%2      \n\t"  /* Save VFRx                  */
	       "l.addi   %0,%0,0x01    \n\t"  /* Increment counter          */
	       "l.sfeq   %0,%3         \n\t"  /* Branch if incomplete       */
	       "l.bf     _L_spfp_loopr \n\t"
	       "l.addi   %1,%1,0x04    \n"    /* (DELAY) update pointer     */
	       "_L_nofpr:              \n\t"  /* End of context save        */
	       : "=&r" (temp), "+r" (address), "=r" (xfer), "=&r" (loop));
}

/*  _CPU_Context_switch
 *
 *  This routine performs a normal non-FP context switch.
 *
 *  NO_CPU Specific Information:
 *
 *  XXX document implementation including references if appropriate
 */

void _CPU_Context_switch(
  Context_Control  *run,
  Context_Control  *heir
)
{
  register uint32_t   temp1 = 0;
  register uint32_t   temp2 = 0;

  /* This function is really tricky. When this function is called,
     we should save our state as we need it, and then grab the
     new state from the pointer. We then do a longjump to this
     code, replacing the current stack pointer with the new
     environment. This function never returns. Instead, at some
     later time, another person will call context switch with
     our pointer in the heir variable, and they will longjump
     to us. We will then continue. Let's see how this works... */

  /* Technically, we could probably not worry about saving r3
     and r4, since these are parameters guaranteed to be saved
     by the calling function. We could also probably get away
     without saving r11, as that is filled in by the return
     statement. But as a first cut I'm in favor of just saving
     everything.... */

  /* We could be more efficient and use compile time directives
     for 32 or 64 bit, but this will allow the code to run on
     everything without modification. Feel free to comment the
     relevant sections out if you don't need it. */

  /* We should probably write this whole routine in assembly
     so that we can have seperate entry points for self restore
     or context switch. You can't jump to local labels from
     inline assembly across function calls, and I don't feel
     like embedding all the .global directives here...it really
     screws up the debugger. Oh well, what's 2 more instructions
     and a branch really cost... */

  /* One thing which we should do is check for 32 or 64 bit models
     first, and then do one branch to the appropriate code section.
     Currently, we check the architecture bit in CPUCFGR twice. Once
     during the load section and again during restore. That is inefficient,
     and considering this code is huge anyway, saving the few bytes
     simply doesn't make any practical sense. FIX THIS LATER. */

  /* Note that this routine assumes software context switches are
     done with the same CID. In other words, it will not manage
     the CIDs and assign a new one as necessary. If you tell it
     to restore a context at CID 2, and the current one is at CID
     4, it will do what it is told. It will overwrite the registers
     for context ID 2, meaning they are irretrievably lost. I hope
     you saved them earlier.... */
 
  /* Note that you can have a context jump anywhere you want, although
     by default we will jump to the L_restore label. If you then modify
     the location in the Context_Control structure, it will continue
     whereever you told it to go. Note however that you had better
     also have cleaned up the stack and frame pointers though, because
     they are probably still set with the values obtained from
     entering this function... */

  asm volatile ("l.sfeqi   %3,0x0        \n\t"  /* Is this a self restore? */
	       "l.bf     _L_restore    \n\t"  /* Yes it is...go there */
	       "l.nop                  \n\t"

	       "l.lwz    %0,0(%3)      \n\t"  /* Prefetch new context */
               "l.mfspr  %2,r0,0x11    \n\t"  /* Status Register */
	       "l.sw     0(%1),%2      \n\t"  /* Save it */
	       "l.srli   %2,%2,28      \n\t"  /* Move CID into low byte */
	       "l.mfspr  %0,%2,0x20    \n\t"  /* Offset from EPCR */
	       "l.sw     4(%1),%0      \n\t"  /* Store it */
	       "l.mfspr  %0,%2,0x30    \n\t"  /* Offset from EEAR */
	       "l.sw     8(%1),%0      \n\t"  /* Store it */
	       "l.mfspr  %0,%2,0x40    \n\t"  /* Offset from ESR */
	       "l.sw     12(%1),%0     \n\t"  /* Store it */
               "l.mfspr  %0,r0,0x02    \n\t"  /* CPUCFGR */
	       "l.andi   %0,%0,0x40    \n\t"  /* OB64S */
	       "l.sfnei  %0,0x0        \n\t"
	       "l.bf     _L_64bit      \n\t"  /* 64 bit architecture */
	       "l.movhi  %0,hi(_L_restore)\n\t"

	       /****  32 bit implementation  ****/
	       "l.ori    %0,%0,lo(_L_restore)\n\t"
	       "l.sw     140(%1),%0    \n\t"   /* Save the PC */
	       "l.lwz    %0,140(%3)    \n\t"   /* New PC. Expect cache miss */
	       "l.sw     16(%1),r1     \n\t"
	       "l.sw     20(%1),r2     \n\t"
	       "l.sw     24(%1),r3     \n\t"
	       "l.sw     28(%1),r4     \n\t"
	       "l.sw     32(%1),r5     \n\t"
	       "l.sw     36(%1),r6     \n\t"
	       "l.sw     40(%1),r7     \n\t"
	       "l.sw     44(%1),r8     \n\t"
	       "l.sw     48(%1),r9     \n\t"
	       "l.sw     52(%1),r10    \n\t"
	       "l.sw     56(%1),r11    \n\t"
	       "l.sw     60(%1),r12    \n\t"
	       "l.sw     64(%1),r13    \n\t"
	       "l.sw     68(%1),r14    \n\t"
	       "l.sw     72(%1),r15    \n\t"
	       "l.sw     76(%1),r16    \n\t"
	       "l.sw     80(%1),r17    \n\t"
	       "l.sw     84(%1),r18    \n\t"
	       "l.sw     88(%1),r19    \n\t"
	       "l.sw     92(%1),r20    \n\t"
	       "l.sw     96(%1),r21    \n\t"
	       "l.sw     100(%1),r22   \n\t"
	       "l.sw     104(%1),r23   \n\t"
	       "l.sw     108(%1),r24   \n\t"
	       "l.sw     112(%1),r25   \n\t"
	       "l.sw     116(%1),r26   \n\t"
	       "l.sw     120(%1),r27   \n\t"
	       "l.sw     124(%1),r28   \n\t"
	       "l.sw     128(%1),r29   \n\t"
	       "l.sw     132(%1),r30   \n\t"
	       "l.jr     %0            \n\t"  /* Go there */
	       "l.sw     136(%1),r31   \n"    /* Store the last reg */

	       /**** 64 bit implementation ****/
	       "_L_64bit:              \n\t"
	       "l.ori    %0,%0,lo(_L_restore)\n\t"
	       "l.sw     264(%1),%0    \n\t"
	       "l.sd     16(%1),r1     \n\t"
	       "l.sd     24(%1),r2     \n\t"
	       "l.sd     32(%1),r3     \n\t"
	       "l.sd     40(%1),r4     \n\t"
	       "l.sd     48(%1),r5     \n\t"
	       "l.sd     56(%1),r6     \n\t"
	       "l.sd     64(%1),r7     \n\t"
	       "l.sd     72(%1),r8     \n\t"
	       "l.sd     80(%1),r9     \n\t"
	       "l.sd     88(%1),r10    \n\t"
	       "l.sd     96(%1),r11    \n\t"
	       "l.sd     104(%1),r12   \n\t"
	       "l.sd     112(%1),r13   \n\t"
	       "l.sd     120(%1),r14   \n\t"
	       "l.sd     128(%1),r15   \n\t"
	       "l.sd     136(%1),r16   \n\t"
	       "l.sd     144(%1),r17   \n\t"
	       "l.sd     152(%1),r18   \n\t"
	       "l.sd     160(%1),r19   \n\t"
	       "l.sd     168(%1),r20   \n\t"
	       "l.sd     176(%1),r21   \n\t"
	       "l.sd     184(%1),r22   \n\t"
	       "l.sd     192(%1),r23   \n\t"
	       "l.sd     200(%1),r24   \n\t"
	       "l.sd     208(%1),r25   \n\t"
	       "l.sd     216(%1),r26   \n\t"
	       "l.sd     224(%1),r27   \n\t"
	       "l.sd     232(%1),r28   \n\t"
	       "l.sd     240(%1),r29   \n\t"
	       "l.sd     248(%1),r30   \n\t"
	       "l.jr     %0            \n\t"  /* Go to the new PC */
	       "l.sd     256(%1),r31   \n"    /* Store the last reg */

	       /**** The restoration routine. ****/
	       
	       /* Note that when we return from this function,
		  we will actually be returning to a different
		  context than when we left. The debugger might
		  have conniptions over this, but we'll have to
		  reengineer that later. The stack and status
		  registers will all be changed, however we
		  will not touch the global interrupt mask. */

	       /* Also note, when doing any restore, the most
		  important registers are r1, r2, and r9. These
		  will be accessed immediately upon exiting the
		  routine, and so we want to make sure we load
		  them as early as possible in case they are 
		  not in cache */

	       "_L_restore:            \n\t"  /* Restore "heir" */
               "l.mfspr  %2,r0,0x11    \n\t"  /* Status Register */
	       "l.movhi  %0,0x07FF     \n\t"  /* ~SR mask  */
	       "l.ori    %0,%0,0xD1FF  \n\t"
	       "l.and    %2,%0,%2      \n\t"  /* save the global bits */
	       "l.movhi  %0,0xF800     \n\t"  /* SR mask  */
	       "l.ori    %0,%0,0x2E00  \n\t"
	       "l.lwz    %1,0(%3)      \n\t"  /* Get the previous SR */
	       "l.and    %0,%1,%0      \n\t"  /* Mask out the global bits */
	       "l.or     %2,%2,%0      \n\t"  /* Combine local/global */
	       "l.mtspr  r0,%2,0x11    \n\t"  /* Restore the status register */

               "l.mfspr  %0,r0,0x02    \n\t"  /* CPUCFGR */
	       "l.andi   %0,%0,0x40    \n\t"  /* OB64S */
	       "l.sfnei  %0,0x0        \n\t"  /* Save the 64 bit flag */

	       "l.srli   %2,%2,28      \n\t"  /* Move CID into low byte */
	       "l.lwz    %0,4(%3)      \n\t"
	       "l.mtspr  %2,%0,0x20    \n\t"  /* Offset from EPCR */
	       "l.lwz    %0,8(%3)      \n\t"
	       "l.mtspr  %2,%0,0x30    \n\t"  /* Offset from EEAR */
	       "l.lwz    %0,12(%3)     \n\t"

	       "l.bf     _L_r64bit     \n\t"  /* 64 bit architecture */
	       "l.mtspr  %2,%0,0x30    \n\t"  /* Offset from EEAR (DELAY) */

	       /**** 32 bit restore ****/
	       "l.lwz   r1,16(%3)      \n\t"
	       "l.lwz   r2,20(%3)      \n\t"
	       "l.lwz   r9,48(%3)      \n\t"
	       "l.lwz   r3,24(%3)      \n\t"
	       "l.lwz   r4,28(%3)      \n\t"
	       "l.lwz   r5,32(%3)      \n\t"
	       "l.lwz   r6,36(%3)      \n\t"
	       "l.lwz   r7,40(%3)      \n\t"
	       "l.lwz   r8,44(%3)      \n\t"
	       "l.lwz   r10,52(%3)     \n\t"
	       "l.lwz   r11,56(%3)     \n\t"
	       "l.lwz   r12,60(%3)     \n\t"
	       "l.lwz   r13,64(%3)     \n\t"
	       "l.lwz   r14,68(%3)     \n\t"
	       "l.lwz   r15,72(%3)     \n\t"
	       "l.lwz   r16,76(%3)     \n\t"
	       "l.lwz   r17,80(%3)     \n\t"
	       "l.lwz   r18,84(%3)     \n\t"
	       "l.lwz   r19,88(%3)     \n\t"
	       "l.lwz   r20,92(%3)     \n\t"
	       "l.lwz   r21,96(%3)     \n\t"
	       "l.lwz   r22,100(%3)    \n\t"
	       "l.lwz   r23,104(%3)    \n\t"
	       "l.lwz   r24,108(%3)    \n\t"
	       "l.lwz   r25,112(%3)    \n\t"
	       "l.lwz   r26,116(%3)    \n\t"
	       "l.lwz   r27,120(%3)    \n\t"
	       "l.lwz   r28,124(%3)    \n\t"
	       "l.lwz   r29,128(%3)    \n\t"
	       "l.lwz   r30,132(%3)    \n\t"
	       "l.j     _L_return      \n\t"
	       "l.lwz   r31,136(%3)    \n"

	       /****  64 bit restore ****/
	       "_L_r64bit:             \n\t"
	       "l.ld    r1,16(%3)      \n\t"
	       "l.ld    r2,24(%3)      \n\t"
	       "l.ld   r9,80(%3)       \n\t"
	       "l.ld   r3,32(%3)       \n\t"
	       "l.ld   r4,40(%3)       \n\t"
	       "l.ld   r5,48(%3)       \n\t"
	       "l.ld   r6,56(%3)       \n\t"
	       "l.ld   r7,64(%3)       \n\t"
	       "l.ld   r8,72(%3)       \n\t"
	       "l.ld   r10,88(%3)      \n\t"
	       "l.ld   r11,96(%3)      \n\t"
	       "l.ld   r12,104(%3)     \n\t"
	       "l.ld   r13,112(%3)     \n\t"
	       "l.ld   r14,120(%3)     \n\t"
	       "l.ld   r15,128(%3)     \n\t"
	       "l.ld   r16,136(%3)     \n\t"
	       "l.ld   r17,144(%3)     \n\t"
	       "l.ld   r18,152(%3)     \n\t"
	       "l.ld   r19,160(%3)     \n\t"
	       "l.ld   r20,168(%3)     \n\t"
	       "l.ld   r21,176(%3)     \n\t"
	       "l.ld   r22,184(%3)     \n\t"
	       "l.ld   r23,192(%3)     \n\t"
	       "l.ld   r24,200(%3)     \n\t"
	       "l.ld   r25,208(%3)     \n\t"
	       "l.ld   r26,216(%3)     \n\t"
	       "l.ld   r27,224(%3)     \n\t"
	       "l.ld   r28,232(%3)     \n\t"
	       "l.ld   r29,240(%3)     \n\t"
	       "l.ld   r30,248(%3)     \n\t"
	       "l.ld   r31,256(%3)     \n"

	       "_L_return:             \n\t"  /* End of routine */
	       
		: "=&r" (temp1), "+r" (run), "=&r" (temp2)
		: "r" (heir));
  
  /* Note that some registers were used for parameter passing and
     temporary registeres (temp1 and temp2). These values were
     saved and restored across context calls, but the values that
     the caller needs should have been stored on the stack. The
     C code should now restore these from the stack, since r1 and
     r2 have been restored, and return to the location specified
     by r9. Then, all should be happy in the world. */
}

/*
 *  _CPU_Context_restore
 *
 *  This routine is generally used only to restart self in an
 *  efficient manner.  It may simply be a label in _CPU_Context_switch.
 *
 *  NOTE: May be unnecessary to reload some registers.
 *
 *  Or1k Specific Information:
 *
 *  In our implementation, this simply redirects to swich context
 */

void _CPU_Context_restore(
  Context_Control  *run
)
{
  _CPU_Context_switch(run,NULL);
}


/*  void __ISR_Handler()
 *
 *  This routine provides the RTEMS interrupt management.
 *
 *  Or1k Specific Information:
 *
 *  Based on the Or1k interrupt architecture described in chapter 16
 *  and the exception architecture described in chapter 9
 */

void _ISR_Handler(uint32_t   vector,uint32_t   ProgramCounter,
		  uint32_t   EffectiveAddress,uint32_t   StatusRegister)
{
   /*
    *  This discussion ignores a lot of the ugly details in a real
    *  implementation such as saving enough registers/state to be
    *  able to do something real.  Keep in mind that the goal is
    *  to invoke a user's ISR handler which is written in C and
    *  uses a certain set of registers.
    *
    *  Also note that the exact order is to a large extent flexible.
    *  Hardware will dictate a sequence for a certain subset of
    *  _ISR_Handler while requirements for setting
    */

  /*
   *  At entry to "common" _ISR_Handler, the vector number must be
   *  available.  On some CPUs the hardware puts either the vector
   *  number or the offset into the vector table for this ISR in a
   *  known place.  If the hardware does not give us this information,
   *  then the assembly portion of RTEMS for this port will contain
   *  a set of distinct interrupt entry points which somehow place
   *  the vector number in a known place (which is safe if another
   *  interrupt nests this one) and branches to _ISR_Handler.
   *
   *  save some or all context on stack
   *  may need to save some special interrupt information for exit
   *
   *  #if ( CPU_HAS_SOFTWARE_INTERRUPT_STACK == TRUE )
   *    if ( _ISR_Nest_level == 0 )
   *      switch to software interrupt stack
   *  #endif
   *
   *  _ISR_Nest_level++;
   *
   *  _Thread_Dispatch_disable_level++;
   *
   *  (*_ISR_Vector_table[ vector ])( vector );
   *
   *  --_ISR_Nest_level;
   *
   *  if ( _ISR_Nest_level )
   *    goto the label "exit interrupt (simple case)"
   *
   *  #if ( CPU_HAS_SOFTWARE_INTERRUPT_STACK == TRUE )
   *    restore stack
   *  #endif
   *  
   *  if ( !_Context_Switch_necessary )
   *    goto the label "exit interrupt (simple case)"
   *  
   *  if ( !_ISR_Signals_to_thread_executing )
   *    _ISR_Signals_to_thread_executing = FALSE;
   *    goto the label "exit interrupt (simple case)"
   *
   *  call _Thread_Dispatch() or prepare to return to _ISR_Dispatch
   *
   *  prepare to get out of interrupt
   *  return from interrupt  (maybe to _ISR_Dispatch)
   *
   *  LABEL "exit interrupt (simple case):
   *  prepare to get out of interrupt
   *  return from interrupt
   */

  /* In the Or1k architecture, exceptions are handled in the
     startup code of the board support package. Thus, this
     routine is never called. Or1k exception routines are called
     with the following prototype:

     function(int vector#, int PC, int Address, int StatusRegister);

     These parameters are snapshots of the system when the exception
     was encountered. If virtual memory is active, things like the
     PC and Address may have little meaning, as they are referenced
     in physical space, not the virtual space of the process.
  */
}