diff options
Diffstat (limited to '')
-rw-r--r-- | c/src/lib/libbsp/powerpc/mcp750/bootloader/head.S | 379 |
1 files changed, 379 insertions, 0 deletions
diff --git a/c/src/lib/libbsp/powerpc/mcp750/bootloader/head.S b/c/src/lib/libbsp/powerpc/mcp750/bootloader/head.S new file mode 100644 index 0000000000..3b413cfe5a --- /dev/null +++ b/c/src/lib/libbsp/powerpc/mcp750/bootloader/head.S @@ -0,0 +1,379 @@ +/* + * $Id$ + * + * This code is loaded by the ROM loader at some arbitrary location. + * Move it to high memory so that it can load the kernel at 0x0000. + * + */ + +#include "bootldr.h" +#include <libcpu/cpu.h> +#include <rtems/score/targopts.h> +#include "asm.h" + +#undef TEST_PPCBUG_CALLS +#define FRAME_SIZE 32 +#define LOCK_CACHES (HID0_DLOCK|HID0_ILOCK) +#define INVL_CACHES (HID0_DCI|HID0_ICFI) +#define ENBL_CACHES (HID0_DCE|HID0_ICE) + +#define USE_PPCBUG +#undef USE_PPCBUG + + START_GOT + GOT_ENTRY(_GOT2_TABLE_) + GOT_ENTRY(_FIXUP_TABLE_) + GOT_ENTRY(.bss) + GOT_ENTRY(codemove) + GOT_ENTRY(0) + GOT_ENTRY(__bd) + GOT_ENTRY(moved) + GOT_ENTRY(_binary_rtems_gz_start) + GOT_ENTRY(_binary_initrd_gz_start) + GOT_ENTRY(_binary_initrd_gz_end) +#ifdef TEST_PPCBUG_CALLS + GOT_ENTRY(banner_start) + GOT_ENTRY(banner_end) +#endif + END_GOT + .globl start + .type start,@function +/* Point the stack into the PreP partition header in the x86 reserved + * code area, so that simple C routines can be called. + */ +start: bl 1f +1: mflr r1 + li r0,0 + stwu r0,start-1b-0x400+0x1b0-FRAME_SIZE(r1) + stmw r26,FRAME_SIZE-24(r1) + GET_GOT + mfmsr r28 /* Turn off interrupts */ + ori r0,r28,MSR_EE + xori r0,r0,MSR_EE + mtmsr r0 + +/* Enable the caches, from now on cr2.eq set means processor is 601 */ + mfpvr r0 + mfspr r29,HID0 + srwi r0,r0,16 + cmplwi cr2,r0,1 + beq 2,2f +#ifndef USE_PPCBUG + ori r0,r29,ENBL_CACHES|INVL_CACHES|LOCK_CACHES + xori r0,r0,INVL_CACHES|LOCK_CACHES + sync + isync + mtspr HID0,r0 +#endif +2: bl reloc + +/* save all the parameters and the orginal msr/hid0/r31 */ + lwz bd,GOT(__bd) + stw r3,0(bd) + stw r4,4(bd) + stw r5,8(bd) + stw r6,12(bd) + lis r3,__size@sectoff@ha + stw r7,16(bd) + stw r8,20(bd) + addi r3,r3,__size@sectoff@l + stw r9,24(bd) + stw r10,28(bd) + stw r28,o_msr(bd) + stw r29,o_hid0(bd) + stw r31,o_r31(bd) + +/* Call the routine to fill boot_data structure from residual data. + * And to find where the code has to be moved. + */ + bl early_setup + +/* Now we need to relocate ourselves, where we are told to. First put a + * copy of the codemove routine to some place in memory. + * (which may be where the 0x41 partition was loaded, so size is critical). + */ + lwz r4,GOT(codemove) + li r5,_size_codemove + lwz r3,mover(bd) + lwz r6,cache_lsize(bd) + bl codemove + mtctr r3 # Where the temporary codemove is. + lwz r3,image(bd) + lis r5,_edata@sectoff@ha + lwz r4,GOT(0) # Our own address + addi r5,r5,_edata@sectoff@l + lwz r6,cache_lsize(bd) + lwz r8,GOT(moved) + sub r7,r3,r4 # Difference to adjust pointers. + add r8,r8,r7 + add r30,r30,r7 + add bd,bd,r7 +/* Call the copy routine but return to the new area. */ + mtlr r8 # for the return address + bctr # returns to the moved instruction +/* Establish the new top stack frame. */ +moved: lwz r1,stack(bd) + li r0,0 + stwu r0,-16(r1) + +/* relocate again */ + bl reloc +/* Clear all of BSS */ + lwz r10,GOT(.bss) + li r0,__bss_words@sectoff@l + subi r10,r10,4 + cmpwi r0,0 + mtctr r0 + li r0,0 + beq 4f +3: stwu r0,4(r10) + bdnz 3b + +/* Final memory initialization. First switch to unmapped mode + * in case the FW had set the MMU on, and flush the TLB to avoid + * stale entries from interfering. No I/O access is allowed + * during this time! + */ +#ifndef USE_PPCBUG +4: bl MMUoff +#endif + bl flush_tlb +/* Some firmware versions leave stale values in the BATs, it's time + * to invalidate them to avoid interferences with our own mappings. + * But the 601 valid bit is in the BATL (IBAT only) and others are in + * the [ID]BATU. Bloat, bloat.. fortunately thrown away later. + */ + li r3,0 + beq cr2,5f + mtdbatu 0,r3 + mtdbatu 1,r3 + mtdbatu 2,r3 + mtdbatu 3,r3 +5: mtibatu 0,r3 + mtibatl 0,r3 + mtibatu 1,r3 + mtibatl 1,r3 + mtibatu 2,r3 + mtibatl 2,r3 + mtibatu 3,r3 + mtibatl 3,r3 + lis r3,__size@sectoff@ha + addi r3,r3,__size@sectoff@l + sync # We are going to touch SDR1 ! + bl mm_init + bl MMUon + +/* Now we are mapped and can perform I/O if we want */ +#ifdef TEST_PPCBUG_CALLS +/* Experience seems to show that PPCBug can only be called with the + * data cache disabled and with MMU disabled. Bummer. + */ + li r10,0x22 # .OUTLN + lwz r3,GOT(banner_start) + lwz r4,GOT(banner_end) + sc +#endif + bl setup_hw + lwz r4,GOT(_binary_rtems_gz_start) + lis r5,_rtems_gz_size@sectoff@ha + lwz r6,GOT(_binary_initrd_gz_start) + lis r3,_rtems_size@sectoff@ha + lwz r7,GOT(_binary_initrd_gz_end) + addi r5,r5,_rtems_gz_size@sectoff@l + addi r3,r3,_rtems_size@sectoff@l + sub r7,r7,r6 + bl decompress_kernel + +/* Back here we are unmapped and we start the kernel, passing up to eight + * parameters just in case, only r3 to r7 used for now. Flush the tlb so + * that the loaded image starts in a clean state. + */ + bl flush_tlb + lwz r3,0(bd) + lwz r4,4(bd) + lwz r5,8(bd) + lwz r6,12(bd) + lwz r7,16(bd) + lwz r8,20(bd) + lwz r9,24(bd) + lwz r10,28(bd) + + lwz r30,0(0) + mtctr r30 +/* + * Linux code again + lis r30,0xdeadc0de@ha + addi r30,r30,0xdeadc0de@l + stw r30,0(0) + li r30,0 +*/ + dcbst 0,r30 /* Make sure it's in memory ! */ +/* We just flash invalidate and disable the dcache, unless it's a 601, + * critical areas have been flushed and we don't care about the stack + * and other scratch areas. + */ + beq cr2,1f + mfspr r0,HID0 + ori r0,r0,HID0_DCI|HID0_DCE + sync + mtspr HID0,r0 + xori r0,r0,HID0_DCI|HID0_DCE + mtspr HID0,r0 +/* Provisional return to FW, works for PPCBug */ +#if 0 +1: mfmsr r10 + ori r10,r10,MSR_IP + mtmsr r10 + li r10,0x63 + sc +#else +1: bctr +#endif + + + +/* relocation function, r30 must point to got2+0x8000 */ +reloc: +/* Adjust got2 pointers, no need to check for 0, this code already puts + * a few entries in the table. + */ + li r0,__got2_entries@sectoff@l + la r12,GOT(_GOT2_TABLE_) + lwz r11,GOT(_GOT2_TABLE_) + mtctr r0 + sub r11,r12,r11 + addi r12,r12,-4 +1: lwzu r0,4(r12) + add r0,r0,r11 + stw r0,0(r12) + bdnz 1b + +/* Now adjust the fixups and the pointers to the fixups in case we need + * to move ourselves again. + */ +2: li r0,__fixup_entries@sectoff@l + lwz r12,GOT(_FIXUP_TABLE_) + cmpwi r0,0 + mtctr r0 + addi r12,r12,-4 + beqlr +3: lwzu r10,4(r12) + lwzux r0,r10,r11 + add r0,r0,r11 + stw r10,0(r12) + stw r0,0(r10) + bdnz 3b + blr + +/* Set the MMU on and off: code is always mapped 1:1 and does not need MMU, + * but it does not cost so much to map it also and it catches calls through + * NULL function pointers. + */ + .globl MMUon + .type MMUon,@function +MMUon: mfmsr r0 + ori r0,r0,MSR_IR|MSR_DR|MSR_IP + mflr r11 + xori r0,r0,MSR_IP + mtsrr0 r11 + mtsrr1 r0 + rfi + .globl MMUoff + .type MMUoff,@function +MMUoff: mfmsr r0 + ori r0,r0,MSR_IR|MSR_DR|MSR_IP + mflr r11 + xori r0,r0,MSR_IR|MSR_DR + mtsrr0 r11 + mtsrr1 r0 + rfi + +/* Due to the PPC architecture (and according to the specifications), a + * series of tlbie which goes through a whole 256 MB segment always flushes + * the whole TLB. This is obviously overkill and slow, but who cares ? + * It takes about 1 ms on a 200 MHz 603e and works even if residual data + * get the number of TLB entries wrong. + */ +flush_tlb: + lis r11,0x1000 +1: addic. r11,r11,-0x1000 + tlbie r11 + bnl 1b +/* tlbsync is not implemented on 601, so use sync which seems to be a superset + * of tlbsync in all cases and do not bother with CPU dependant code + */ + sync + blr +/* A few utility functions, some copied from arch/ppc/lib/string.S */ + +#if 0 + .globl strnlen + .type strnlen,@function +strnlen: + addi r4,r4,1 + mtctr r4 + addi r4,r3,-1 +1: lbzu r0,1(r4) + cmpwi 0,r0,0 + bdnzf eq,1b + subf r3,r3,r4 + blr +#endif + .globl codemove +codemove: + .type codemove,@function +/* r3 dest, r4 src, r5 length in bytes, r6 cachelinesize */ + cmplw cr1,r3,r4 + addi r0,r5,3 + srwi. r0,r0,2 + beq cr1,4f /* In place copy is not necessary */ + beq 7f /* Protect against 0 count */ + mtctr r0 + bge cr1,2f + + la r8,-4(r4) + la r7,-4(r3) +1: lwzu r0,4(r8) + stwu r0,4(r7) + bdnz 1b + b 4f + +2: slwi r0,r0,2 + add r8,r4,r0 + add r7,r3,r0 +3: lwzu r0,-4(r8) + stwu r0,-4(r7) + bdnz 3b + +/* Now flush the cache: note that we must start from a cache aligned + * address. Otherwise we might miss one cache line. + */ +4: cmpwi r6,0 + add r5,r3,r5 + beq 7f /* Always flush prefetch queue in any case */ + subi r0,r6,1 + andc r3,r3,r0 + mr r4,r3 +5: cmplw r4,r5 + dcbst 0,r4 + add r4,r4,r6 + blt 5b + sync /* Wait for all dcbst to complete on bus */ + mr r4,r3 +6: cmplw r4,r5 + icbi 0,r4 + add r4,r4,r6 + blt 6b +7: sync /* Wait for all icbi to complete on bus */ + isync + blr + .size codemove,.-codemove +_size_codemove=.-codemove + + .section ".data" # .rodata + .align 2 +#ifdef TEST_PPCBUG_CALLS +banner_start: + .ascii "This message was printed by PPCBug with MMU enabled" +banner_end: +#endif |