diff options
author | Sebastian Huber <sebastian.huber@embedded-brains.de> | 2018-03-26 06:18:51 +0200 |
---|---|---|
committer | Sebastian Huber <sebastian.huber@embedded-brains.de> | 2018-03-26 10:40:48 +0200 |
commit | 11fe8c59c6c145bca52e977363183add0fbe1b59 (patch) | |
tree | aa9810f50d832df7cd5af8befd9acc05e430fb14 /bsps | |
parent | bsp/tqm8xx: Move libcpu content to bsps (diff) | |
download | rtems-11fe8c59c6c145bca52e977363183add0fbe1b59.tar.bz2 |
bsps/powerpc: Move MMU support to bsps
This patch is a part of the BSP source reorganization.
Update #3285.
Diffstat (limited to 'bsps')
-rw-r--r-- | bsps/powerpc/shared/mmu/bat.c | 545 | ||||
-rw-r--r-- | bsps/powerpc/shared/mmu/e500-mmu.c | 631 | ||||
-rw-r--r-- | bsps/powerpc/shared/mmu/mmuAsm.S | 530 | ||||
-rw-r--r-- | bsps/powerpc/shared/mmu/pte121.c | 1139 |
4 files changed, 2845 insertions, 0 deletions
diff --git a/bsps/powerpc/shared/mmu/bat.c b/bsps/powerpc/shared/mmu/bat.c new file mode 100644 index 0000000000..7a9487b9ff --- /dev/null +++ b/bsps/powerpc/shared/mmu/bat.c @@ -0,0 +1,545 @@ +/* + * bat.c + * + * This file contains the implementation of C function to + * Instantiate 60x/7xx ppc Block Address Translation (BAT) registers. + * More detailed information can be found on motorola + * site and more precisely in the following book : + * + * MPC750 + * Risc Microporcessor User's Manual + * Mtorola REF : MPC750UM/AD 8/97 + * + * Copyright (C) 1999 Eric Valette (valette@crf.canon.fr) + * Canon Centre Recherche France. + * + * The license and distribution terms for this file may be + * found in the file LICENSE in this distribution or at + * http://www.rtems.org/license/LICENSE. + */ +#include <rtems.h> +#include <libcpu/bat.h> +#include <libcpu/spr.h> +#include <rtems/bspIo.h> + +#include <libcpu/cpuIdent.h> + +#define TYP_I 1 +#define TYP_D 0 + +typedef union +{ /* BAT register values to be loaded */ + BAT bat; + struct + { + unsigned int u, l; + } words; +} ubat; + +typedef struct batrange +{ /* stores address ranges mapped by BATs */ + unsigned long start; + unsigned long limit; + unsigned long phys; +} batrange; + +batrange bat_addrs[2][8] = { { {0,} } }; + +/* could encode this in bat_addrs but I don't touch that one for bwds compat. reasons */ +/* bitmask of used bats */ +static unsigned bat_in_use[2] = { 0, 0 }; + +/* define a few macros */ + +#define CLRBAT_ASM(batu,r) \ + " sync \n" \ + " isync \n" \ + " li "#r ", 0 \n" \ + " mtspr "#batu ", "#r "\n" \ + " sync \n" \ + " isync \n" + +#define SETBAT_ASM(batu, batl, u, l)\ + " mtspr "#batl ", "#l " \n" \ + " sync \n" \ + " isync \n" \ + " mtspr "#batu ", "#u " \n" \ + " sync \n" \ + " isync \n" + +#define CLRBAT(bat) \ + asm volatile( \ + CLRBAT_ASM(%0, 0) \ + : \ + :"i"(bat##U) \ + :"0") + +#define GETBAT(bat,u,l) \ + asm volatile( \ + " mfspr %0, %2 \n" \ + " mfspr %1, %3 \n" \ + :"=r"(u),"=r"(l) \ + :"i"(bat##U),"i"(bat##L) \ + ) + +#define DECL_SETBAT(lcbat,bat) \ +void \ +asm_set##lcbat(unsigned int upper, unsigned int lower) \ +{ \ +asm volatile( \ + CLRBAT_ASM(%0,0) \ + SETBAT_ASM(%0,%1,%2,%3) \ + : \ + :"i"(bat##U), \ + "i"(bat##L), \ + "r"(upper),"r"(lower) \ + :"0"); \ +} + +/* export the 'asm' versions for historic reasons */ +DECL_SETBAT (dbat0, DBAT0) +DECL_SETBAT (dbat1, DBAT1) +DECL_SETBAT (dbat2, DBAT2) +DECL_SETBAT (dbat3, DBAT3) + +static DECL_SETBAT (dbat4, DBAT4) +static DECL_SETBAT (dbat5, DBAT5) +static DECL_SETBAT (dbat6, DBAT6) +static DECL_SETBAT (dbat7, DBAT7) + +static DECL_SETBAT (ibat0, IBAT0) +static DECL_SETBAT (ibat1, IBAT1) +static DECL_SETBAT (ibat2, IBAT2) +static DECL_SETBAT (ibat3, IBAT3) +static DECL_SETBAT (ibat4, IBAT4) +static DECL_SETBAT (ibat5, IBAT5) +static DECL_SETBAT (ibat6, IBAT6) +static DECL_SETBAT (ibat7, IBAT7) + + +SPR_RO (HID0); + +static void +set_hid0_sync (unsigned long val) +{ + __asm__ volatile ( + " sync \n" + " isync \n" + " mtspr %0, %1 \n" + " sync \n" + " isync \n" + : + :"i" (HID0), "r" (val) + :"memory" /* paranoia */ + ); +} + +static void +bat_addrs_put (ubat * bat, int typ, int idx) +{ + unsigned long bl; + if (bat->bat.batu.vp || bat->bat.batu.vs) { + bat_addrs[typ][idx].start = bat->bat.batu.bepi << 17; + bat_addrs[typ][idx].phys = bat->bat.batl.brpn << 17; + + /* extended BL cannot be extracted using BAT union + * - let's just hope the upper bits read 0 on pre 745x + * CPUs. + */ + bl = (bat->words.u << 15) | ((1 << 17) - 1); + bat_addrs[typ][idx].limit = bat_addrs[typ][idx].start + bl; + + bat_in_use[typ] |= (1 << idx); + } +} + +/* We don't know how the board was initialized. Therefore, + * when 'setdbat' is first used we must initialize our + * cache. + */ +static void +bat_addrs_init (void) +{ + ubat bat; + + GETBAT (DBAT0, bat.words.u, bat.words.l); + bat_addrs_put (&bat, TYP_D, 0); + GETBAT (DBAT1, bat.words.u, bat.words.l); + bat_addrs_put (&bat, TYP_D, 1); + GETBAT (DBAT2, bat.words.u, bat.words.l); + bat_addrs_put (&bat, TYP_D, 2); + GETBAT (DBAT3, bat.words.u, bat.words.l); + bat_addrs_put (&bat, TYP_D, 3); + + GETBAT (IBAT0, bat.words.u, bat.words.l); + bat_addrs_put (&bat, TYP_I, 0); + GETBAT (IBAT1, bat.words.u, bat.words.l); + bat_addrs_put (&bat, TYP_I, 1); + GETBAT (IBAT2, bat.words.u, bat.words.l); + bat_addrs_put (&bat, TYP_I, 2); + GETBAT (IBAT3, bat.words.u, bat.words.l); + bat_addrs_put (&bat, TYP_I, 3); + + + if ( ppc_cpu_has_8_bats() && (HID0_7455_HIGH_BAT_EN & _read_HID0 ())) { + GETBAT (DBAT4, bat.words.u, bat.words.l); + bat_addrs_put (&bat, TYP_D, 4); + GETBAT (DBAT5, bat.words.u, bat.words.l); + bat_addrs_put (&bat, TYP_D, 5); + GETBAT (DBAT6, bat.words.u, bat.words.l); + bat_addrs_put (&bat, TYP_D, 6); + GETBAT (DBAT7, bat.words.u, bat.words.l); + bat_addrs_put (&bat, TYP_D, 7); + GETBAT (IBAT4, bat.words.u, bat.words.l); + bat_addrs_put (&bat, TYP_I, 4); + GETBAT (IBAT5, bat.words.u, bat.words.l); + bat_addrs_put (&bat, TYP_I, 5); + GETBAT (IBAT6, bat.words.u, bat.words.l); + bat_addrs_put (&bat, TYP_I, 6); + GETBAT (IBAT7, bat.words.u, bat.words.l); + bat_addrs_put (&bat, TYP_I, 7); + } +} + +static void +do_dssall (void) +{ + /* Before changing BATs, 'dssall' must be issued. + * We check MSR for MSR_VE and issue a 'dssall' if + * MSR_VE is set hoping that + * a) on non-altivec CPUs MSR_VE reads as zero + * b) all altivec CPUs use the same bit + * NOTE: psim doesn't implement dssall so we skip if we run on psim + */ + if ( (_read_MSR () & MSR_VE) && PPC_PSIM != get_ppc_cpu_type() ) { + /* this construct is needed because we don't know + * if this file is compiled with -maltivec. + * (I plan to add altivec support outside of + * RTEMS core and hence I'd rather not + * rely on consistent compiler flags). + */ +#define DSSALL 0x7e00066c /* dssall opcode */ + __asm__ volatile (" .long %0"::"i" (DSSALL)); +#undef DSSALL + } +} + +/* Clear I/D bats 4..7 ONLY ON 7455 etc. */ +static void +clear_hi_bats (void) +{ + do_dssall (); + CLRBAT (DBAT4); + CLRBAT (DBAT5); + CLRBAT (DBAT6); + CLRBAT (DBAT7); + CLRBAT (IBAT4); + CLRBAT (IBAT5); + CLRBAT (IBAT6); + CLRBAT (IBAT7); +} + +static int +check_bat_index (int i) +{ + unsigned long hid0; + + if (i >= 0 && i < 4) + return 0; + if (i >= 4 && i < 8) { + if ( ! ppc_cpu_has_8_bats() ) + return -1; + /* OK, we're on the right hardware; + * check if we are already enabled + */ + hid0 = _read_HID0 (); + if (HID0_7455_HIGH_BAT_EN & hid0) + return 0; + /* No; enable now */ + clear_hi_bats (); + set_hid0_sync (hid0 | HID0_7455_HIGH_BAT_EN); + return 0; + } + return -1; +} + +/* size argument check: + * - must be a power of two or zero + * - must be <= 1<<28 ( non 745x cpu ) + * - can be 1<<29..1<31 or 0xffffffff on 745x + * - size < 1<<17 means 0 + * computes and returns the block mask + * RETURNS: + * block mask on success or -1 on error + */ +static int +check_bat_size (unsigned long size) +{ + unsigned long bit; + unsigned long hid0; + + /* First of all, it must be a power of two */ + if (0 == size) + return 0; + + if (0xffffffff == size) { + bit = 32; + } else { + __asm__ volatile (" cntlzw %0, %1":"=r" (bit):"r" (size)); + bit = 31 - bit; + if (1 << bit != size) + return -1; + } + /* bit < 17 is not really legal but we aliased it to 0 in the past */ + if (bit > (11 + 17)) { + if ( ! ppc_cpu_has_8_bats() ) + return -1; + + hid0 = _read_HID0 (); + /* Let's enable the larger block size if necessary */ + if (!(HID0_7455_XBSEN & hid0)) + set_hid0_sync (hid0 | HID0_7455_XBSEN); + } + + return (1 << (bit - 17)) - 1; +} + +static int +check_overlap (int typ, unsigned long start, unsigned long size) +{ + int i; + unsigned long limit = start + size - 1; + for (i = 0; i < sizeof (bat_addrs[typ]) / sizeof (bat_addrs[typ][0]); i++) { + if (!((1 << i) & bat_in_use[typ])) + continue; /* unused bat */ + /* safe is 'limit < bat_addrs[t][i].start || start > bat_addrs[t][i].limit */ + if (limit >= bat_addrs[typ][i].start && start <= bat_addrs[typ][i].limit) + return i; + } + return -1; +} + + +/* Take no risks -- the essential parts of this routine run with + * interrupts disabled! + */ + +static int +setbat (int typ, int bat_index, unsigned long virt, unsigned long phys, + unsigned int size, int flags) +{ + unsigned long level; + unsigned int bl; + int err; + int wimgxpp; + ubat bat; + + if (check_bat_index (bat_index)) { + printk ("Invalid BAT index %d\n", bat_index); + return -1; + } + + if ((int) (bl = check_bat_size (size)) < 0) { + printk ("Invalid BAT size %u\n", size); + return -1; + } + + if (virt & (size - 1)) { + printk ("BAT effective address 0x%08lx misaligned (size is 0x%08x)\n", + virt, size); + return -1; + } + + if (phys & (size - 1)) { + printk ("BAT physical address 0x%08lx misaligned (size is 0x%08x)\n", phys, + size); + return -1; + } + + if (virt + size - 1 < virt) { + printk ("BAT range invalid: wraps around zero 0x%08lx..0x%08lx\n", virt, + virt + size - 1); + return -1; + } + + if ( TYP_I == typ && ( ( _PAGE_GUARDED | _PAGE_WRITETHRU ) & flags ) ) { + printk("IBAT must not have 'guarded' or 'writethrough' attribute\n"); + return -1; + } + +/* must protect the bat_addrs table -- since this routine is only used for board setup + * or similar special purposes we don't bother about interrupt latency too much. + */ + rtems_interrupt_disable (level); + + { /* might have to initialize our cached data */ + static char init_done = 0; + if (!init_done) { + bat_addrs_init (); + init_done = 1; + } + } + + err = check_overlap (typ, virt, size); + if ((size >= (1 << 17)) && (err >= 0) && (err != bat_index)) { + rtems_interrupt_enable (level); + printk ("BATs must not overlap; area 0x%08lx..0x%08lx hits %cBAT %i\n", + virt, virt + size, (TYP_I == typ ? 'I' : 'D'), err); + return -1; + } + + /* 603, 604, etc. */ + wimgxpp = flags & (_PAGE_WRITETHRU | _PAGE_NO_CACHE + | _PAGE_COHERENT | _PAGE_GUARDED); + wimgxpp |= (flags & _PAGE_RW) ? BPP_RW : BPP_RX; + bat.words.u = virt | (bl << 2) | 2; /* Vs=1, Vp=0 */ + bat.words.l = phys | wimgxpp; + if (flags & _PAGE_USER) + bat.bat.batu.vp = 1; + bat_addrs[typ][bat_index].start = virt; + bat_addrs[typ][bat_index].limit = virt + ((bl + 1) << 17) - 1; + bat_addrs[typ][bat_index].phys = phys; + bat_in_use[typ] |= 1 << bat_index; + if (size < (1 << 17)) { + /* size of 0 tells us to switch it off */ + bat.bat.batu.vp = 0; + bat.bat.batu.vs = 0; + bat_in_use[typ] &= ~(1 << bat_index); + /* mimic old behavior when bl was 0 (bs==0 is actually legal; it doesnt + * indicate a size of zero. We now accept bl==0 and look at the size. + */ + bat_addrs[typ][bat_index].limit = virt; + } + do_dssall (); + if ( TYP_I == typ ) { + switch (bat_index) { + case 0: asm_setibat0 (bat.words.u, bat.words.l); break; + case 1: asm_setibat1 (bat.words.u, bat.words.l); break; + case 2: asm_setibat2 (bat.words.u, bat.words.l); break; + case 3: asm_setibat3 (bat.words.u, bat.words.l); break; + /* cpu check already done in check_index */ + case 4: asm_setibat4 (bat.words.u, bat.words.l); break; + case 5: asm_setibat5 (bat.words.u, bat.words.l); break; + case 6: asm_setibat6 (bat.words.u, bat.words.l); break; + case 7: asm_setibat7 (bat.words.u, bat.words.l); break; + default: /* should never get here anyways */ + break; + } + } else { + switch (bat_index) { + case 0: asm_setdbat0 (bat.words.u, bat.words.l); break; + case 1: asm_setdbat1 (bat.words.u, bat.words.l); break; + case 2: asm_setdbat2 (bat.words.u, bat.words.l); break; + case 3: asm_setdbat3 (bat.words.u, bat.words.l); break; + /* cpu check already done in check_index */ + case 4: asm_setdbat4 (bat.words.u, bat.words.l); break; + case 5: asm_setdbat5 (bat.words.u, bat.words.l); break; + case 6: asm_setdbat6 (bat.words.u, bat.words.l); break; + case 7: asm_setdbat7 (bat.words.u, bat.words.l); break; + default: /* should never get here anyways */ + break; + } + } + rtems_interrupt_enable (level); + + return 0; +} + +static int +getbat (int typ, int idx, unsigned long *pu, unsigned long *pl) +{ + unsigned long u, l; + + if (check_bat_index (idx)) { + printk ("Invalid BAT #%i\n", idx); + return -1; + } + if ( TYP_I == typ ) { + switch (idx) { + case 0: GETBAT (IBAT0, u, l); break; + case 1: GETBAT (IBAT1, u, l); break; + case 2: GETBAT (IBAT2, u, l); break; + case 3: GETBAT (IBAT3, u, l); break; + /* cpu check already done in check_index */ + case 4: GETBAT (IBAT4, u, l); break; + case 5: GETBAT (IBAT5, u, l); break; + case 6: GETBAT (IBAT6, u, l); break; + case 7: GETBAT (IBAT7, u, l); break; + default: /* should never get here anyways */ + return -1; + } + } else { + switch (idx) { + case 0: GETBAT (DBAT0, u, l); break; + case 1: GETBAT (DBAT1, u, l); break; + case 2: GETBAT (DBAT2, u, l); break; + case 3: GETBAT (DBAT3, u, l); break; + /* cpu check already done in check_index */ + case 4: GETBAT (DBAT4, u, l); break; + case 5: GETBAT (DBAT5, u, l); break; + case 6: GETBAT (DBAT6, u, l); break; + case 7: GETBAT (DBAT7, u, l); break; + default: /* should never get here anyways */ + return -1; + } + } + if (pu) { + *pu = u; + } + if (pl) { + *pl = l; + } + + if (!pu && !pl) { + /* dump */ + ubat b; + b.words.u = u; + b.words.l = l; + printk ("Raw %cBAT %i contents; UPPER: (0x%08lx)", (TYP_I == typ ? 'I' : 'D'), idx, u); + printk (" BEPI: 0x%08x", b.bat.batu.bepi); + printk (" BL: 0x%08lx", (u >> 2) & ((1 << 15) - 1)); + printk (" VS: 0b%i", b.bat.batu.vs); + printk (" VP: 0b%i", b.bat.batu.vp); + printk ("\n"); + printk (" LOWER: (0x%08lx)", l); + printk (" RPN: 0x%08x", b.bat.batl.brpn); + printk (" wimg: 0b%1i%1i%1i%1i", b.bat.batl.w, b.bat.batl.i, + b.bat.batl.m, b.bat.batl.g); + printk (" PP: 0x%1x", b.bat.batl.pp); + printk ("\n"); + printk ("Covering EA Range: "); + if (bat_in_use[typ] & (1 << idx)) + printk ("0x%08lx .. 0x%08lx\n", bat_addrs[typ][idx].start, + bat_addrs[typ][idx].limit); + else + printk ("<none> (BAT off)\n"); + + } + return u; +} + +int +setdbat (int bat_index, unsigned long virt, unsigned long phys, + unsigned int size, int flags) +{ + return setbat(TYP_D, bat_index, virt, phys, size, flags); +} + +int +setibat (int bat_index, unsigned long virt, unsigned long phys, + unsigned int size, int flags) +{ + return setbat(TYP_I, bat_index, virt, phys, size, flags); +} + +int +getdbat (int idx, unsigned long *pu, unsigned long *pl) +{ + return getbat (TYP_D, idx, pu, pl); +} + +int +getibat (int idx, unsigned long *pu, unsigned long *pl) +{ + return getbat (TYP_I, idx, pu, pl); +} diff --git a/bsps/powerpc/shared/mmu/e500-mmu.c b/bsps/powerpc/shared/mmu/e500-mmu.c new file mode 100644 index 0000000000..15fe88f91a --- /dev/null +++ b/bsps/powerpc/shared/mmu/e500-mmu.c @@ -0,0 +1,631 @@ +/* + * Routines to manipulate e500 TLBs; TLB0 (fixed 4k page size) + * is not very useful so we mostly focus on TLB1 (variable page size). + * + * TLB0's 256 entries are 2-way set associative which means that + * only 2 entries for page index numbers with matching 7 LSBs + * are available. + * + * E.g., look at EA = 0xAAAyy000. 0xAAAyy is the page index. + * + * The least-significant 7 bits in 'yy' determine the 'way' + * in the TLB 0 array. At most two EAs with matching 'yy' bits + * (the 7 LSBs, that is) can be mapped with TLB0 since there + * are only two entries per 'way'. + * + * Since this is a real-time OS we want to stay away from + * software TLB replacement. + */ + +/* + * Authorship + * ---------- + * This software was created by + * Till Straumann <strauman@slac.stanford.edu>, 2005-2007, + * Stanford Linear Accelerator Center, Stanford University. + * + * Acknowledgement of sponsorship + * ------------------------------ + * This software was produced by + * the Stanford Linear Accelerator Center, Stanford University, + * under Contract DE-AC03-76SFO0515 with the Department of Energy. + * + * Government disclaimer of liability + * ---------------------------------- + * Neither the United States nor the United States Department of Energy, + * nor any of their employees, makes any warranty, express or implied, or + * assumes any legal liability or responsibility for the accuracy, + * completeness, or usefulness of any data, apparatus, product, or process + * disclosed, or represents that its use would not infringe privately owned + * rights. + * + * Stanford disclaimer of liability + * -------------------------------- + * Stanford University makes no representations or warranties, express or + * implied, nor assumes any liability for the use of this software. + * + * Stanford disclaimer of copyright + * -------------------------------- + * Stanford University, owner of the copyright, hereby disclaims its + * copyright and all other rights in this software. Hence, anyone may + * freely use it for any purpose without restriction. + * + * Maintenance of notices + * ---------------------- + * In the interest of clarity regarding the origin and status of this + * SLAC software, this and all the preceding Stanford University notices + * are to remain affixed to any copy or derivative of this software made + * or distributed by the recipient and are to be affixed to any copy of + * software made or distributed by the recipient that contains a copy or + * derivative of this software. + * + * ------------------ SLAC Software Notices, Set 4 OTT.002a, 2004 FEB 03 + */ + +/* 8450 MSR definitions; note that there are *substantial* differences + * compared to classic powerpc; in particular, IS/DS are *different* + * from IR/DR; the e500 MMU can not be switched off! + * + * Also: To disable/enable all external interrupts, CE and EE must both be + * controlled. + */ +#include <rtems.h> +#include <rtems/bspIo.h> +#include <inttypes.h> +#include <stdio.h> + +#include <libcpu/e500_mmu.h> + +#define TLBIVAX_TLBSEL (1<<(63-60)) +#define TLBIVAX_INV_ALL (1<<(63-61)) + +#define E500_TLB_ATTR_WIMGE(x) ((x)&0x7f) /* includes user bits */ +#define E500_TLB_ATTR_WIMGE_GET(x) ((x)&0x7f) +#define E500_TLB_ATTR_TS (1<<7) +#define E500_TLB_ATTR_PERM(x) (((x)&0x3ff)<<8) +#define E500_TLB_ATTR_PERM_GET(x) (((x)>>8)&0x3ff) +#define E500_TLB_ATTR_TID(x) (((x)&0xfff)<<20) +#define E500_TLB_ATTR_TID_GET(x) (((x)>>20)&0xfff) + + +#ifdef DEBUG +#define STATIC +#else +#define STATIC static +#endif + +/* Factory to generate inline macros for accessing the MAS registers */ +#define __RDWRMAS(mas,rmas) \ + static inline uint32_t _read_MAS##mas(void) \ + { uint32_t x; __asm__ volatile("mfspr %0, %1": "=r"(x):"i"(rmas)); return x; } \ + static inline void _write_MAS##mas(uint32_t x) \ + { __asm__ volatile("mtspr %1, %0":: "r"(x),"i"(rmas)); } + +__RDWRMAS(0,FSL_EIS_MAS0) +__RDWRMAS(1,FSL_EIS_MAS1) +__RDWRMAS(2,FSL_EIS_MAS2) +__RDWRMAS(3,FSL_EIS_MAS3) +__RDWRMAS(4,FSL_EIS_MAS4) +__RDWRMAS(6,FSL_EIS_MAS6) + +#undef __RDWRMAS + +static int initialized = 0; + +E500_tlb_va_cache_t rtems_e500_tlb_va_cache[16]; + +/* Since it is likely that these routines are used during + * early initialization when stdio is not available yet + * we provide a helper that resorts to 'printk()' + */ +static void +myprintf(FILE *f, char *fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + + if (!f || !_impure_ptr->__sdidinit) { + /* + * Might be called at an early stage when + * stdio is not yet initialized. + */ + vprintk(fmt,ap); + } else { + vfprintf(f,fmt,ap); + } + va_end(ap); +} + + +void +rtems_e500_dmptlbc(FILE *f) +{ +int i; + if ( !initialized ) { + myprintf(stderr,"TLB cache not initialized\n"); + return; + } + for ( i=0; i<16; i++ ) { + if ( !rtems_e500_tlb_va_cache[i].att.v ) + continue; + myprintf(f,"#%2i: TID 0x%03x, TS %i, ea 0x%08x .. 0x%08x\n", + i, + rtems_e500_tlb_va_cache[i].va.va_tid, + rtems_e500_tlb_va_cache[i].att.ts, + rtems_e500_tlb_va_cache[i].va.va_epn<<12, + (rtems_e500_tlb_va_cache[i].va.va_epn<<12) + (1024<<(2*rtems_e500_tlb_va_cache[i].att.sz))-1); + myprintf(f,"PA 0x%08"PRIx32", PERM 0x%03x, WIMGE 0x%02x\n", + rtems_e500_tlb_va_cache[i].rpn<<12, + rtems_e500_tlb_va_cache[i].att.perm, + rtems_e500_tlb_va_cache[i].att.wimge); + } +} + +#define E500_SELTLB_1 0x1000 + +static void seltlb(rtems_e500_tlb_idx key) +{ +int idx = key & ~E500_SELTLB_1; + + if ( key & E500_SELTLB_1 ) { + _write_MAS0( FSL_EIS_MAS0_TLBSEL | FSL_EIS_MAS0_ESEL(idx) ); + } else { + _write_MAS0( (idx & 128) ? FSL_EIS_MAS0_ESEL(1) : FSL_EIS_MAS0_ESEL(0) ); + _write_MAS2( FSL_EIS_MAS2_EPN( idx & 127 ) ); + } +} + +/* + * Read a TLB entry from the hardware; if it is a TLB1 entry + * then the current settings are stored in the + * rtems_e500_tlb_va_cache[] structure. + * + * The routine can perform this operation quietly or + * print information to a file. + * + * 'sel': which TLB array to use; TLB0 (4k) if zero, + * TLB1 (variable) if nonzero. + * 'idx': which TLB entry to access. + * 'quiet': perform operation silently (no info printed) + * if nonzero. + * 'f': open FILE where to print information. May be + * NULL in which case 'stdout' is used. + * + * RETURNS: + * 0: success; TLB entry is VALID + * +1: success but TLB entry is INVALID + * < 0: error (-1: invalid argument) + */ +int +rtems_e500_prtlb(rtems_e500_tlb_idx key, int quiet, FILE *f) +{ + uint32_t mas1, mas2, mas3; + rtems_interrupt_level lvl; + E500_tlb_va_cache_t *tlb; + E500_tlb_va_cache_t buf; + int sel, idx; + + sel = (key & E500_SELTLB_1) ? 1 : 0; + idx = key & ~E500_SELTLB_1; + + if ( idx < 0 || idx > 255 || ( idx > 15 && sel ) ) + return -1; + + rtems_interrupt_disable(lvl); + + seltlb( key ); + + asm volatile("tlbre"); + + /* not manipulating MAS0, skip reading it */ + mas1 = _read_MAS1(); + mas2 = _read_MAS2(); + mas3 = _read_MAS3(); + + rtems_interrupt_enable(lvl); + + tlb = sel ? rtems_e500_tlb_va_cache + idx : &buf; + + if ( (tlb->att.v = (FSL_EIS_MAS1_V & mas1) ? 1 : 0) ) { + tlb->va.va_epn = FSL_EIS_MAS2_EPN_GET(mas2); + tlb->rpn = FSL_EIS_MAS3_RPN_GET(mas3); + tlb->va.va_tid = FSL_EIS_MAS1_TID_GET(mas1); + tlb->att.ts = (FSL_EIS_MAS1_TS & mas1) ? 1 : 0; + tlb->att.sz = sel ? FSL_EIS_MAS1_TSIZE_GET(mas1) : 1 /* 4k size */; + tlb->att.wimge = FSL_EIS_MAS2_ATTR_GET(mas2); + tlb->att.perm = FSL_EIS_MAS3_PERM_GET(mas3); + } + + if ( tlb->att.v ) { + if ( !quiet ) { +/* + "TLB[1] Entry # 0 spans EA range 0x00000000 .. 0x00000000 + "Mapping: VA [TS 0/TID 0x00/EPN 0x00000] -> RPN 0x00000" + "Size: TSIZE 0x0 ( 4^ts KiB = 000000 KiB = 0x00000000 B) + "Attributes: PERM 0x000 (ux/sx/uw/sw/ur/sr) WIMGE 0x00 IPROT 0" +*/ + myprintf(f, + "TLB[%i] Entry # %d spans EA range 0x%08x .. 0x%08x\r\n", + sel, + idx, + (tlb->va.va_epn << 12), + (tlb->va.va_epn << 12) + (1024<<(2*tlb->att.sz)) - 1 + ); + + myprintf(f, + "Mapping: VA [TS %d/TID 0x%02x/EPN 0x%05x] -> RPN 0x%05"PRIx32"\r\n", + tlb->att.ts, tlb->va.va_tid, tlb->va.va_epn, tlb->rpn + ); + myprintf(f, + "Size: TSIZE 0x%x ( 4^ts KiB = %6d KiB = 0x%08x B)\r\n", + tlb->att.sz, (1<<(2*tlb->att.sz)), (1024<<(2*tlb->att.sz)) + ); + myprintf(f, + "Attributes: PERM 0x%03x (ux/sx/uw/sw/ur/sr) WIMGE 0x%02x IPROT %i\r\n", + tlb->att.perm, tlb->att.wimge, (sel && (mas1 & FSL_EIS_MAS1_IPROT) ? 1 : 0) + ); + myprintf(f, + "EA range 0x%08x .. 0x%08x\r\n", + (tlb->va.va_epn << 12), + (tlb->va.va_epn << 12) + (1024<<(2*tlb->att.sz)) - 1 + ); + } + } else { + if ( !quiet ) { + myprintf(f, "TLB[%i] Entry #%i <OFF> (size 0x%x = 0x%xb)\n", sel, idx, tlb->att.sz, (1024<<(2*tlb->att.sz))); + } + return 1; + } + return 0; +} + +/* Initialize cache; verify that TLB0 is unused; + * + * RETURNS: zero on success, nonzero on error (TLB0 + * seems to be in use); in this case the + * driver will refuse to change TLB1 entries + * (other than disabling them). + */ +int rtems_e500_initlb() +{ +int i; +int rval = 0; + for (i=0; i<16; i++) + rtems_e500_prtlb(E500_SELTLB_1 | i, 1, 0); + for (i=0; i<256; i++) { + /* refuse to enable operations that change TLB entries + * if anything in TLB[0] is valid (because we currently + * don't check against overlap with TLB[0] when we + * write a new entry). + */ + if ( rtems_e500_prtlb(E500_SELTLB_0 | i, 1, 0) <=0 ) { + myprintf(stderr,"WARNING: 4k TLB #%i seems to be valid; UNSUPPORTED configuration\n", i); + rval = -1; + } + } + if ( !rval ) + initialized = 1; + return rval; +} + +/* + * Write TLB1 entry (can also be used to disable an entry). + * + * The routine checks against the cached data in + * rtems_e500_tlb_va[] to prevent the user from generating + * overlapping entries. + * + * 'idx': TLB 1 entry # to manipulate + * 'ea': Effective address (must be page aligned) + * 'pa': Physical address (must be page aligned) + * 'sz': Page size selector; page size is + * 1024 * 2^(2*sz) bytes. + * 'sz' may also be one of the following: + * - page size in bytes ( >= 1024 ); the selector + * value is then computed by this routine. + * However, 'sz' must be a valid page size + * or -1 will be returned. + * - a value < 0 to invalidate/disable the + * TLB entry. + * 'attr': Page attributes; ORed combination of WIMGE, + * PERMissions, TID and TS. Use ATTR_xxx macros + * + * RETURNS: 0 on success, nonzero on error: + * + * >0: requested mapping would overlap with + * existing mapping in other entry. Return + * value gives conflicting entry + 1; i.e., + * if a value of 4 is returned then the request + * conflicts with existing mapping in entry 3. + * -1: invalid argument + * -3: driver not initialized (or initialization + * failed because TLB0 is in use). + * <0: other error + * + */ +#define E500_TLB_ATTR_WIMGE(x) ((x)&0x7f) /* includes user bits */ +#define E500_TLB_ATTR_WIMGE_GET(x) ((x)&0x7f) +#define E500_TLB_ATTR_TS (1<<7) +#define E500_TLB_ATTR_PERM(x) (((x)&0x3ff)<<8) +#define E500_TLB_ATTR_PERM_GET(x) (((x)>>8)&0x3ff) +#define E500_TLB_ATTR_TID(x) (((x)&0xfff)<<20) +#define E500_TLB_ATTR_TID_GET(x) (((x)>>20)&0xfff) + +int +rtems_e500_wrtlb(int idx, uint32_t ea, uint32_t pa, int sz, uint32_t attr) +{ +uint32_t mas1, mas2, mas3, mas4; +uint32_t tid, msk; +int lkup; +rtems_interrupt_level lvl; + + if ( sz >= 1024 ) { + /* Assume they literally specify a size */ + msk = sz; + sz = 0; + while ( msk != (1024<<(2*sz)) ) { + if ( ++sz > 15 ) { + return -1; + } + } + /* OK, acceptable */ + } + + msk = sz > 0 ? (1024<<(2*sz)) - 1 : 0; + + if ( !initialized && sz > 0 ) { + myprintf(stderr,"TLB driver not initialized; refuse to enable any entry\n"); + return -3; + } + + if ( (ea & msk) || (pa & msk) ) { + myprintf(stderr,"Misaligned ea or pa\n"); + return -1; + } + + if ( idx < 0 || idx > 15 ) + return -1; + + if ( sz > 15 ) { + /* but e500v1 doesn't support all 16 sizes!! */ + /* FIXME: we should inquire about this CPU's + * capabilities... + */ + return -1; + } + + tid = E500_TLB_ATTR_TID_GET(attr); + + mas1 = (attr & E500_TLB_ATTR_TS) ? FSL_EIS_MAS1_TS : 0; + + if ( sz >=0 ) { + lkup = rtems_e500_matchtlb(ea, tid, mas1, sz); + + if ( lkup < -1 ) { + /* some error */ + return lkup; + } + + if ( lkup >= 0 && lkup != idx ) { + myprintf(stderr,"TLB[1] #%i overlaps with requested mapping\n", lkup); + rtems_e500_prtlb( E500_SELTLB_1 | lkup, 0, stderr); + return lkup+1; + } + } + + /* OK to proceed */ + mas1 |= FSL_EIS_MAS1_IPROT | FSL_EIS_MAS1_TID(tid); + + if ( sz >= 0 ) + mas1 |= FSL_EIS_MAS1_V | FSL_EIS_MAS1_TSIZE(sz); + + mas2 = FSL_EIS_MAS2_EPN( ea>>12 ) | E500_TLB_ATTR_WIMGE(attr); + mas3 = FSL_EIS_MAS3_RPN( pa>>12 ) | E500_TLB_ATTR_PERM_GET(attr); + /* mas4 is not really relevant; we don't use TLB replacement */ + mas4 = FSL_EIS_MAS4_TLBSELD | FSL_EIS_MAS4_TIDSELD(0) | FSL_EIS_MAS4_TSIZED(9) | FSL_EIS_MAS4_ID | FSL_EIS_MAS4_GD; + + rtems_interrupt_disable(lvl); + + seltlb(idx | E500_SELTLB_1); + + _write_MAS1(mas1); + _write_MAS2(mas2); + _write_MAS3(mas3); + _write_MAS4(mas4); + + asm volatile( + " sync\n" + " isync\n" + " tlbwe\n" + " sync\n" + " isync\n" + ); + + rtems_interrupt_enable(lvl); + + /* update cache */ + rtems_e500_prtlb( E500_SELTLB_1 | idx, 1, 0); + + return 0; +} + +/* + * Check if a ts/tid/ea/sz mapping overlaps + * with an existing entry. + * + * ASSUMPTION: all TLB0 (fixed 4k pages) are invalid and always unused. + * + * NOTE: 'sz' is the 'logarithmic' size selector; the page size + * is 1024*2^(2*sz). + * + * RETURNS: + * >= 0: index of TLB1 entry that already provides a mapping + * which overlaps within the ea range. + * -1: SUCCESS (no conflicting entry found) + * <=-2: ERROR (invalid input) + */ +int rtems_e500_matchtlb(uint32_t ea, uint32_t tid, int ts, int sz) +{ +int i; +uint32_t m,a; +E500_tlb_va_cache_t *tlb; + + if ( sz < 0 || sz > 15 ) + return -4; + + sz = (1024<<(2*sz)); + + if ( !initialized ) { + /* cache not initialized */ + return -3; + } + + if ( ea & (sz-1) ) { + /* misaligned ea */ + return -2; + } + + if ( ts ) + ts = 1; + + for ( i=0, tlb=rtems_e500_tlb_va_cache; i<16; i++, tlb++ ) { + if ( ! tlb->att.v ) + continue; + if ( tlb->att.ts != ts ) + continue; + if ( tlb->va.va_tid && tlb->va.va_tid != tid ) + continue; + /* TID and TS match a valid entry */ + m = (1024<<(2*tlb->att.sz)) - 1; + /* calculate starting address of this entry */ + a = tlb->va.va_epn<<12; + if ( ea <= a + m && ea + sz -1 >= a ) { + /* overlap */ + return i; + } + } + return -1; +} + +/* Find TLB index that maps 'ea/as' combination + * + * RETURNS: index 'key'; i.e., the index number plus + * a bit (E500_SELTLB_1) which indicates whether + * the mapping was found in TLB0 (4k fixed page + * size) or in TLB1 (variable page size). + * + * On error (no mapping) -1 is returned. + */ +rtems_e500_tlb_idx +rtems_e500_ftlb(uint32_t ea, int as) +{ +uint32_t pid, mas0, mas1; +int i, rval = -1; +rtems_interrupt_level lvl; + + rtems_interrupt_disable(lvl); + + for ( i=0; i<3; i++ ) { + switch (i) { + case 0: asm volatile("mfspr %0, %1":"=r"(pid):"i"(FSL_EIS_PID0)); break; + case 1: asm volatile("mfspr %0, %1":"=r"(pid):"i"(FSL_EIS_PID1)); break; + case 2: asm volatile("mfspr %0, %1":"=r"(pid):"i"(FSL_EIS_PID2)); break; + default: + goto bail; + } + + _write_MAS6( FSL_EIS_MAS6_SPID0(pid) | (as ? FSL_EIS_MAS6_SAS : 0 ) ); + + asm volatile("tlbsx 0, %0"::"r"(ea)); + + mas1 = _read_MAS1(); + + if ( (FSL_EIS_MAS1_V & mas1) ) { + mas0 = _read_MAS0(); + if ( FSL_EIS_MAS0_TLBSEL & mas0 ) { + /* TLB1 */ + rval = FSL_EIS_MAS0_ESEL_GET(mas0) | E500_SELTLB_1; + } else { + rval = (ea >> (63-51)) | (( FSL_EIS_MAS0_NV & mas0 ) ? 180 : 0 ) ; + } + break; + } + } + +bail: + rtems_interrupt_enable(lvl); + return rval; +} + +/* Mark TLB entry as invalid ('disabled'). Unlike + * rtems_e500_wrtlb() with a negative size argument + * this routine also can disable TLB0 entries. + * + * 'key': TLB entry (index) ORed with selector bit + * (0 for TLB0, E500_SELTLB_1 for TLB1). + * + * RETURNS: zero on success, nonzero on error (TLB + * unchanged). + * + * NOTE: If a TLB1 entry is disabled the associated + * entry in rtems_e500_va_cache[] is also + * marked as disabled. + */ +int +rtems_e500_clrtlb(rtems_e500_tlb_idx key) +{ +rtems_e500_tlb_idx k0; +rtems_interrupt_level lvl; + + /* minimal guard against bad key */ + if ( key < 0 ) + return -1; + + if ( (key & E500_SELTLB_1) ) { + if ( (key & ~E500_SELTLB_1) > 15 ) { + myprintf(stderr,"Invalid TLB index; TLB1 index must be < 16\n"); + return -1; + } + } else if ( key > 255 ) { + myprintf(stderr,"Invalid TLB index; TLB0 index must be < 256\n"); + return -1; + } + + /* Must not invalidate page 0 which holds vectors, text etc... */ + k0 = rtems_e500_ftlb(0, 0); + if ( -1 == k0 ) { + myprintf(stderr,"tlbivax; something's fishy - I don't find mapping for addr. 0\n"); + return -1; + } + + /* NOTE: we assume PID is ignored, and AS is 0 */ + if ( k0 == key ) { + myprintf(stderr,"Refuse to invalidate page holding addr 0 (always needed)\n"); + return -1; + } + + rtems_interrupt_disable(lvl); + + seltlb(key); + + asm volatile("tlbre"); + + /* read old entries */ + _write_MAS1( _read_MAS1() & ~FSL_EIS_MAS1_V ); + + asm volatile( + " sync\n" + " isync\n" + " tlbwe\n" + " sync\n" + " isync\n" + ); + + /* update cache */ + if ( E500_SELTLB_1 & key ) + rtems_e500_tlb_va_cache[ (~E500_SELTLB_1 & key) ].att.v = 0; + + rtems_interrupt_enable(lvl); + + return 0; +} diff --git a/bsps/powerpc/shared/mmu/mmuAsm.S b/bsps/powerpc/shared/mmu/mmuAsm.S new file mode 100644 index 0000000000..e64a5dfe89 --- /dev/null +++ b/bsps/powerpc/shared/mmu/mmuAsm.S @@ -0,0 +1,530 @@ +/* + * mmuAsm.S + * + * Copyright (C) 1999 Eric Valette (valette@crf.canon.fr) + * + * This file contains the low-level support for various MMU + * features. + * + * The license and distribution terms for this file may be + * found in the file LICENSE in this distribution or at + * http://www.rtems.org/license/LICENSE. + * + * T. Straumann - 11/2001: added support for 7400 (no AltiVec yet) + * S.K. Feng - 10/2003: added support for 7455 (no AltiVec yet) + * + */ + +#include <rtems/asm.h> +#include <rtems/score/cpu.h> +#include <libcpu/io.h> +#include <libcpu/bat.h> + +/* Unfortunately, the CPU types defined in cpu.h are + * an 'enum' type and hence not available :-( + */ +#define PPC_601 0x1 +#define PPC_603 0x3 +#define PPC_604 0x4 +#define PPC_603e 0x6 +#define PPC_603ev 0x7 +#define PPC_750 0x8 +#define PPC_604e 0x9 +#define PPC_604r 0xA +#define PPC_7400 0xC +#define PPC_7455 0x8001 +#define PPC_7457 0x8002 +#define PPC_620 0x16 +#define PPC_860 0x50 +#define PPC_821 PPC_860 +#define PPC_8260 0x81 +#define PPC_8240 PPC_8260 + +/* ALTIVEC instructions (not recognized by off-the shelf gcc yet) */ +#define DSSALL .long 0x7e00066c /* DSSALL altivec instruction opcode */ + +/* A couple of defines to make the code more readable */ +#define CACHE_LINE_SIZE 32 + +#ifndef MSSCR0 +#define MSSCR0 1014 +#endif + +#define DL1HWF (1<<(31-8)) +#define L2HWF (1<<(31-20)) + +#FIXME Should really move this to C code + + .globl L1_caches_enables + .type L1_caches_enables, @function + +L1_caches_enables: + /* + * Enable caches and 604-specific features if necessary. + */ + mfspr r9,PPC_PVR + rlwinm r9,r9,16,16,31 + cmpi 0,r9,PPC_601 + beq 4f /* not needed for 601 */ + mfspr r11,HID0 + andi. r0,r11,HID0_DCE + ori r11,r11,HID0_ICE|HID0_DCE + ori r8,r11,HID0_ICFI + bne 3f /* don't invalidate the D-cache */ + ori r8,r8,HID0_DCI /* unless it wasn't enabled */ +3: + sync + mtspr HID0,r8 /* enable and invalidate caches */ + sync + mtspr HID0,r11 /* enable caches */ + sync + isync + cmpi 1,r9,PPC_604 /* check for 604 */ + cmpi 2,r9,PPC_604e /* or 604e */ + cmpi 3,r9,PPC_604r /* or mach5 */ + cror 6,6,10 + cror 6,6,14 + cmpi 2,r9,PPC_750 /* or 750 */ + cror 6,6,10 + cmpi 2,r9,PPC_7400 /* or 7400 */ + cror 6,6,10 + cmpli 0,r9,PPC_7455 /* or 7455 */ + beq 1f + cmpli 0,r9,PPC_7457 /* or 7457 */ + bne 2f +1: + /* 7455:link register stack,branch folding & + * TBEN : enable the time base and decrementer. + * EMCP bit is defined in HID1. However, it's not used + * in mvme5500 board because of GT64260 (e.g. it's connected + * pull-up). + */ + oris r11,r11,(HID0_LRSTK|HID0_FOLD|HID0_TBEN)@h + ori r11,r11,(HID0_LRSTK|HID0_FOLD|HID0_TBEN)@l +2: cror 2,2,10 + bne 3f + ori r11,r11,HID0_BTIC /* enable branch tgt cache on 7400 , 7455 , 7457 */ +3: cror 2,2,6 + bne 4f + /* on 7400 SIED is actually SGE (store gathering enable) */ + ori r11,r11,HID0_SIED|HID0_BHTE /* for 604[e], enable */ + bne 2,5f + ori r11,r11,HID0_BTCD +5: mtspr HID0,r11 /* superscalar exec & br history tbl */ + sync /* for SGE bit */ + isync /* P2-17 to 2-22 in MPC7450UM */ +4: + blr + + .globl get_L1CR +.type get_L1CR, @function +get_L1CR: + mfspr r3,HID0 + blr + + .globl get_L2CR + .type get_L2CR, @function +get_L2CR: + /* Make sure this is a > 750 chip */ + mfspr r3,PPC_PVR + rlwinm r3,r3,16,16,31 + cmplwi r3,PPC_750 /* it's a 750 */ + beq 1f + cmplwi r3,PPC_7400 /* it's a 7400 */ + beq 1f + cmplwi r3,PPC_7455 /* it's a 7455 */ + beq 1f + cmplwi r3,PPC_7457 /* it's a 7457 */ + beq 1f + li r3,-1 + blr + +1: + /* Return the L2CR contents */ + mfspr r3,L2CR + blr + + .globl set_L2CR + .type set_L2CR, @function +set_L2CR: + /* Usage: + * When setting the L2CR register, you must do a few special things. + * If you are enabling the cache, you must perform a global invalidate. + * If you are disabling the cache, you must flush the cache contents first. + * This routine takes care of doing these things. When first + * enabling the cache, make sure you pass in the L2CR you want, as well as + * passing in the global invalidate bit set. A global invalidate will + * only be performed if the L2I bit is set in applyThis. When enabling + * the cache, you should also set the L2E bit in applyThis. If you + * want to modify the L2CR contents after the cache has been enabled, + * the recommended procedure is to first call __setL2CR(0) to disable + * the cache and then call it again with the new values for L2CR. Examples: + * + * _setL2CR(0) - disables the cache + * _setL2CR(0xb9A14000) - enables my G3 MCP750 card: + * - L2E set to turn on the cache + * - L2SIZ set to 1MB + * - L2CLK set to %2 + * - L2RAM set to pipelined syncronous late-write + * - L2I set to perform a global invalidation + * - L2OH set to 1 nS + * + * A similar call should work for your card. You need to know the correct + * setting for your card and then place them in the fields I have outlined + * above. Other fields support optional features, such as L2DO which caches + * only data, or L2TS which causes cache pushes from the L1 cache to go to + *the L2 cache instead of to main memory. + */ + + /* Make sure this is a > 750 chip */ + mfspr r0,PPC_PVR + rlwinm r0,r0,16,16,31 + cmplwi r0,PPC_750 + beq thisIs750 + cmplwi r0,PPC_7400 + beq thisIs750 + cmplwi r0,PPC_7455 + beq thisIs750 + cmplwi r0,PPC_7457 + beq thisIs750 + li r3,-1 + blr + +thisIs750: + /* Get the current enable bit of the L2CR into r4 */ + mfspr r4,L2CR + rlwinm r4,r4,0,0,0 + + /* See if we want to perform a global inval this time. */ + rlwinm r6,r3,0,10,10 /* r6 contains the new invalidate bit */ + rlwinm. r5,r3,0,0,0 /* r5 contains the new enable bit */ + rlwinm r3,r3,0,11,9 /* Turn off the invalidate bit */ + rlwinm r3,r3,0,1,31 /* Turn off the enable bit */ + or r3,r3,r4 /* Keep the enable bit the same as it was for now. */ + mfmsr r7 /* shut off interrupts around critical flush/invalidate sections */ + rlwinm r4,r7,0,17,15 /* Turn off EE bit - an external exception while we are flushing + the cache is fatal (comment this line and see!) */ + mtmsr r4 + bne dontDisableCache /* Only disable the cache if L2CRApply has the enable bit off */ + + cmplwi r0,PPC_7400 /* 7400 ? */ + bne disableCache /* use traditional method */ + + /* On the 7400, they recommend using the hardware flush feature */ + DSSALL /* stop all data streams */ + sync + /* we wouldn't have to flush L1, but for sake of consistency with the other code we do it anyway */ + mfspr r4, MSSCR0 + oris r4, r4, DL1HWF@h + mtspr MSSCR0, r4 + sync + /* L1 flushed */ + mfspr r4, L2CR + ori r4, r4, L2HWF + mtspr L2CR, r4 + sync + /* L2 flushed */ + b flushDone + +disableCache: + /* Disable the cache. First, we turn off data relocation. */ + rlwinm r4,r4,0,28,26 /* Turn off DR bit */ + cmplwi r0,PPC_7455 /* 7455 ? */ + beq 1f + cmplwi r0,PPC_7457 /* 7457 ? */ + bne not745x +1: + /* 745x:L1 Load/Flush, L2, L3 : hardware flush */ + DSSALL + mtmsr r4 + sync + isync + mfspr r4, MSSCR0 + rlwinm r4,r4,0,29,0 /* Turn off the L2PFE bits */ + mtspr MSSCR0, r4 + sync + /* flush L1 first */ + lis r4,0x0001 + mtctr r4 + li r4,0 + li r0,0 +loadFlush: + lwzx r0,r0,r4 + dcbf r0,r4 + addi r4,r4,CACHE_LINE_SIZE /* Go to start of next cache line */ + bdnz loadFlush + sync + /* Set the L2CR[L2IO & L2DO] bits to completely lock the L2 cache */ + mfspr r0, L2CR + lis r4,L2CR_LOCK_745x@h + ori r4,r4,L2CR_LOCK_745x@l + or r4,r0,r4 + rlwinm r4,r4,0,11,9 /* make sure the invalidate bit off */ + mtspr L2CR, r4 + sync + ori r4, r4, L2HWF + mtspr L2CR, r4 + sync + /* L2 flushed,L2IO & L2DO got cleared in the dontDisableCache: */ + b reenableDR + +not745x: + sync + mtmsr r4 + isync + /* + Now, read the first 2MB of memory to put new data in the cache. + (Actually we only need the size of the L2 cache plus + the size of the L1 cache, but 2MB will cover everything just to be safe). + */ + lis r4,0x0001 + mtctr r4 + li r4,0 +loadLoop: + lwzx r0,r0,r4 + addi r4,r4,CACHE_LINE_SIZE /* Go to start of next cache line */ + bdnz loadLoop + + /* Now, flush the first 2MB of memory */ + lis r4,0x0001 + mtctr r4 + li r4,0 + sync +flushLoop: + dcbf r0,r4 + addi r4,r4,CACHE_LINE_SIZE /* Go to start of next cache line */ + bdnz flushLoop +reenableDR: + rlwinm r4,r7,0,17,15 /* still mask EE but reenable data relocation */ + sync + mtmsr r4 + isync + +flushDone: + + /* Turn off the L2CR enable bit. */ + rlwinm r3,r3,0,1,31 + +dontDisableCache: + /* Set up the L2CR configuration bits */ + sync + mtspr L2CR,r3 + sync + cmplwi r6,0 + beq noInval + + /* Perform a global invalidation */ + oris r3,r3,0x0020 + sync + mtspr L2CR,r3 + sync +invalCompleteLoop: /* Wait for the invalidation to complete */ + mfspr r3,L2CR + rlwinm. r4,r3,0,31,31 + bne invalCompleteLoop + + rlwinm r3,r3,0,11,9; /* Turn off the L2I bit */ + sync + mtspr L2CR,r3 + +noInval: + sync + /* re-enable interrupts, i.e. restore original MSR */ + mtmsr r7 /* (no sync needed) */ + /* See if we need to enable the cache */ + cmplwi r5,0 + beqlr + +enableCache: + /* Enable the cache */ + oris r3,r3,0x8000 + mtspr L2CR,r3 + sync + blr + + + .globl get_L3CR + .type get_L3CR, @function +get_L3CR: + /* Make sure this is a 7455 chip */ + mfspr r3,PPC_PVR + rlwinm r3,r3,16,16,31 + cmplwi r3,PPC_7455 /* it's a 7455 */ + beq 1f + cmplwi r3,PPC_7457 /* it's a 7457 */ + beq 1f + li r3,-1 + blr + +1: + /* Return the L3CR contents */ + mfspr r3,L3CR + blr + + .globl set_L3CR + .type set_L3CR, @function +set_L3CR: + /* Usage: + * When setting the L3CR register, you must do a few special things. + * If you are enabling the cache, you must perform a global invalidate. + * Then call cpu_enable_l3cr(l3cr). + * If you are disabling the cache, you must flush the cache contents first. + * This routine takes care of doing these things. If you + * want to modify the L3CR contents after the cache has been enabled, + * the recommended procedure is to first call __setL3CR(0) to disable + * the cache and then call cpu_enable_l3cr with the new values for + * L3CR. + */ + + /* Make sure this is a 7455 chip */ + mfspr r0,PPC_PVR + rlwinm r0,r0,16,16,31 + cmplwi r0,PPC_7455 + beq thisIs7455 + cmplwi r0,PPC_7457 + beq thisIs7455 + li r3,-1 + blr + +thisIs7455: + /* Get the current enable bit of the L3CR into r4 */ + mfspr r4,L3CR + rlwinm r4,r4,0,0,0 + + /* See if we want to perform a global inval this time. */ + rlwinm r6,r3,0,10,10 /* r6 contains the new invalidate bit */ + rlwinm. r5,r3,0,0,0 /* r5 contains the new enable bit */ + rlwinm r3,r3,0,11,9 /* Turn off the invalidate bit */ + rlwinm r3,r3,0,1,31 /* Turn off the enable bit */ + or r3,r3,r4 /* Keep the enable bit the same as it was for now. */ + mfmsr r7 /* shut off interrupts around critical flush/invalidate sections */ + rlwinm r4,r7,0,17,15 /* Turn off EE bit - an external exception while we are flushing + the cache is fatal (comment this line and see!) */ + mtmsr r4 + bne dontDisableL3Cache /* Only disable the cache if L3CRApply has the enable bit off */ + /* Before the L3 is disabled, it must be flused to prevent coherency problems */ + /* First, we turn off data relocation. */ + rlwinm r4,r4,0,28,26 /* Turn off DR bit */ + DSSALL + sync + mtmsr r4 + isync /* make sure memory accesses have completed */ + /* 7455: L3 : hardware flush + * Set the L3CR[L3IO & L3DO] bits to completely lock the L3 cache */ + mfspr r0, L3CR + lis r4, L3CR_LOCK_745x@h + ori r4,r4, L3CR_LOCK_745x@l + or r4,r0,r4 + rlwinm r4,r4,0,11,9 /* make sure the invalidate bit off */ + mtspr L3CR, r4 + sync + ori r4, r4, L3CR_L3HWF + mtspr L3CR, r4 + sync + /* L3 flushed,L3IO & L3DO got cleared in the dontDisableL3Cache: */ + rlwinm r4,r7,0,17,15 /* still mask EE but reenable data relocation */ + sync + mtmsr r4 + isync + + /* Turn off the L3CR enable bit. */ + rlwinm r3,r3,0,1,31 + +dontDisableL3Cache: + /* Set up the L3CR configuration bits */ + sync + mtspr L3CR,r3 + sync +ifL3Inval: + cmplwi r6,0 + beq noL3Inval + + /* Perform a global invalidation */ + oris r3,r3,0x0020 + sync + mtspr L3CR,r3 + sync +invalCompleteL3: /* Wait for the invalidation to complete */ + mfspr r3,L3CR + rlwinm. r4,r3,0,31,31 + bne invalCompleteL3 + + rlwinm r3,r3,0,11,9; /* Turn off the L3I bit */ + sync + mtspr L3CR,r3 + sync + +noL3Inval: + /* re-enable interrupts, i.e. restore original MSR */ + mtmsr r7 /* (no sync needed) */ + /* See if we need to enable the cache */ + cmplwi r5,0 + beqlr + +enableL3Cache: + /* Enable the cache */ + oris r3,r3,0x8000 + mtspr L3CR,r3 + sync + blr + +/* + * An undocumented "feature" of 604e requires that the v bit + * be cleared before changing BAT values. + * + * Also, newer IBM firmware does not clear bat3 and 4 so + * this makes sure it's done. + * -- Cort + */ + .globl CPU_clear_bats_early + .type CPU_clear_bats_early,@function +CPU_clear_bats_early: + li r3,0 + mfspr r4,PPC_PVR + rlwinm r4,r4,16,16,31 /* r4 = 1 for 601, 4 for 604 */ + cmpwi r4, 1 + sync + isync + beq 1f + cmplwi r4,0x8001 /* 7445, 7455 (0x8001), 7447, 7457 (0x8002) */ + blt 2f /* 7447a (0x8003) and 7448 (0x8004) have 16 bats */ + cmplwi r4,0x8004 + bgt 2f + mtspr DBAT4U,r3 + mtspr DBAT4L,r3 + mtspr DBAT5U,r3 + mtspr DBAT5L,r3 + mtspr DBAT6U,r3 + mtspr DBAT6L,r3 + mtspr DBAT7U,r3 + mtspr DBAT7L,r3 + mtspr IBAT4U,r3 + mtspr IBAT4L,r3 + mtspr IBAT5U,r3 + mtspr IBAT5L,r3 + mtspr IBAT6U,r3 + mtspr IBAT6L,r3 + mtspr IBAT7U,r3 + mtspr IBAT7L,r3 +2: + mtspr DBAT0U,r3 + mtspr DBAT0L,r3 + mtspr DBAT1U,r3 + mtspr DBAT1L,r3 + mtspr DBAT2U,r3 + mtspr DBAT2L,r3 + mtspr DBAT3U,r3 + mtspr DBAT3L,r3 +1: + mtspr IBAT0U,r3 + mtspr IBAT0L,r3 + mtspr IBAT1U,r3 + mtspr IBAT1L,r3 + mtspr IBAT2U,r3 + mtspr IBAT2L,r3 + mtspr IBAT3U,r3 + mtspr IBAT3L,r3 + sync + isync + blr + diff --git a/bsps/powerpc/shared/mmu/pte121.c b/bsps/powerpc/shared/mmu/pte121.c new file mode 100644 index 0000000000..93ef909776 --- /dev/null +++ b/bsps/powerpc/shared/mmu/pte121.c @@ -0,0 +1,1139 @@ +/* + * Trivial page table setup for RTEMS + * Purpose: allow write protection of text/RO-data + */ + +/* + * Authorship + * ---------- + * This software was created by + * Till Straumann <strauman@slac.stanford.edu>, 4/2002, 2003, 2004, + * Stanford Linear Accelerator Center, Stanford University. + * + * Acknowledgement of sponsorship + * ------------------------------ + * This software was produced by + * the Stanford Linear Accelerator Center, Stanford University, + * under Contract DE-AC03-76SFO0515 with the Department of Energy. + * + * Government disclaimer of liability + * ---------------------------------- + * Neither the United States nor the United States Department of Energy, + * nor any of their employees, makes any warranty, express or implied, or + * assumes any legal liability or responsibility for the accuracy, + * completeness, or usefulness of any data, apparatus, product, or process + * disclosed, or represents that its use would not infringe privately owned + * rights. + * + * Stanford disclaimer of liability + * -------------------------------- + * Stanford University makes no representations or warranties, express or + * implied, nor assumes any liability for the use of this software. + * + * Stanford disclaimer of copyright + * -------------------------------- + * Stanford University, owner of the copyright, hereby disclaims its + * copyright and all other rights in this software. Hence, anyone may + * freely use it for any purpose without restriction. + * + * Maintenance of notices + * ---------------------- + * In the interest of clarity regarding the origin and status of this + * SLAC software, this and all the preceding Stanford University notices + * are to remain affixed to any copy or derivative of this software made + * or distributed by the recipient and are to be affixed to any copy of + * software made or distributed by the recipient that contains a copy or + * derivative of this software. + * + * ------------------ SLAC Software Notices, Set 4 OTT.002a, 2004 FEB 03 + */ + +/* Chose debugging options */ +#undef DEBUG_MAIN /* create a standalone (host) program for basic testing */ +#undef DEBUG /* target debugging and consistency checking */ +#undef DEBUG_EXC /* add exception handler which reenables BAT0 and recovers from a page fault */ + +#ifdef DEBUG_MAIN +#undef DEBUG /* must not use these together with DEBUG_MAIN */ +#undef DEBUG_EXC +#endif + +/***************************** INCLUDE HEADERS ****************************/ + +#ifndef DEBUG_MAIN +#include <rtems.h> +#include <rtems/bspIo.h> +#include <rtems/score/percpu.h> +#include <libcpu/cpuIdent.h> +#include <libcpu/spr.h> +#ifdef DEBUG_EXC +#include <bsp.h> +#include <bsp/vectors.h> +#endif +#endif + +#include <stdio.h> +#include <assert.h> +#include <string.h> + +#include <libcpu/pte121.h> + +/************************** CONSTANT DEFINITIONS **************************/ + +/* Base 2 logs of some sizes */ + +#ifndef DEBUG_MAIN + +#define LD_PHYS_SIZE 32 /* physical address space */ +#define LD_PG_SIZE 12 /* page size */ +#define LD_PTEG_SIZE 6 /* PTEG size */ +#define LD_PTE_SIZE 3 /* PTE size */ +#define LD_SEG_SIZE 28 /* segment size */ +#define LD_MIN_PT_SIZE 16 /* minimal size of a page table */ +#define LD_HASH_SIZE 19 /* lengh of a hash */ +#define LD_VSID_SIZE 24 /* vsid bits in seg. register */ + +#else /* DEBUG_MAIN */ + +/* Reduced 'fantasy' sizes for testing */ +#define LD_PHYS_SIZE 32 /* physical address space */ +#define LD_PG_SIZE 6 /* page size */ +#define LD_PTEG_SIZE 5 /* PTEG size */ +#define LD_PTE_SIZE 3 /* PTE size */ +#define LD_SEG_SIZE 28 /* segment size */ +#define LD_MIN_PT_SIZE 7 /* minimal size of a page table */ +#define LD_HASH_SIZE 19 /* lengh of a hash */ + +#endif /* DEBUG_MAIN */ + +/* Derived sizes */ + +/* Size of a page index */ +#define LD_PI_SIZE ((LD_SEG_SIZE) - (LD_PG_SIZE)) + +/* Number of PTEs in a PTEG */ +#define PTE_PER_PTEG (1<<((LD_PTEG_SIZE)-(LD_PTE_SIZE))) + +/* Segment register bits */ +#define KEY_SUP (1<<30) /* supervisor mode key */ +#define KEY_USR (1<<29) /* user mode key */ + +/* The range of effective addresses to scan with 'tlbie' + * instructions in order to flush all TLBs. + * On the 750 and 7400, there are 128 two way I and D TLBs, + * indexed by EA[14:19]. Hence calling + * tlbie rx + * where rx scans 0x00000, 0x01000, 0x02000, ... 0x3f000 + * is sufficient to do the job + */ +#define NUM_TLB_PER_WAY 64 /* 750 and 7400 have 128 two way TLBs */ +#define FLUSH_EA_RANGE (NUM_TLB_PER_WAY<<LD_PG_SIZE) + +/*************************** MACRO DEFINITIONS ****************************/ + +/* Macros to split a (32bit) 'effective' address into + * VSID (virtual segment id) and PI (page index) + * using a 1:1 mapping of 'effective' to 'virtual' + * addresses. + * + * For 32bit addresses this looks like follows + * (each 'x' or '0' stands for a 'nibble' [4bits]): + * + * 32bit effective address (EA) + * + * x x x x x x x x + * | | + * 0 0 0 0 0 x|x x x x|x x x + * VSID | PI | PO (page offset) + * | | + */ +/* 1:1 VSID of an EA */ +#define VSID121(ea) (((ea)>>LD_SEG_SIZE) & ((1<<(LD_PHYS_SIZE-LD_SEG_SIZE))-1)) +/* page index of an EA */ +#define PI121(ea) (((ea)>>LD_PG_SIZE) & ((1<<LD_PI_SIZE)-1)) + +/* read VSID from segment register */ +#ifndef DEBUG_MAIN +static uint32_t +seg2vsid (uint32_t ea) +{ + __asm__ volatile ("mfsrin %0, %0":"=r" (ea):"0" (ea)); + return ea & ((1 << LD_VSID_SIZE) - 1); +} +#else +#define seg2vsid(ea) VSID121(ea) +#endif + +/* Primary and secondary PTE hash functions */ + +/* Compute the primary hash from a VSID and a PI */ +#define PTE_HASH1(vsid, pi) (((vsid)^(pi))&((1<<LD_HASH_SIZE)-1)) + +/* Compute the secondary hash from a primary hash */ +#define PTE_HASH2(hash1) ((~(hash1))&((1<<LD_HASH_SIZE)-1)) + +/* Extract the abbreviated page index (which is the + * part of the PI which does not go into the hash + * under all circumstances [10 bits to -> 6bit API]) + */ +#define API(pi) ((pi)>>((LD_MIN_PT_SIZE)-(LD_PTEG_SIZE))) + + +/* Horrible Macros */ +#ifdef __rtems__ +/* must not use printf until multitasking is up */ +typedef int (*PrintF) (const char *, ...); +static PrintF +whatPrintf (void) +{ + return _Thread_Executing ? printf : printk; +} + +#define PRINTF(args...) ((void)(whatPrintf())(args)) +#else +#define PRINTF(args...) printf(args) +#endif + +#ifdef DEBUG +static unsigned long triv121PgTblConsistency( + Triv121PgTbl pt, int pass, int expect); + +static int consistencyPass = 0; +#define CONSCHECK(expect) triv121PgTblConsistency(&pgTbl,consistencyPass++,(expect)) +#else +#define CONSCHECK(expect) do {} while (0) +#endif + +/**************************** TYPE DEFINITIONS ****************************/ + +/* internal description of a trivial page table */ +typedef struct Triv121PgTblRec_ +{ + APte base; + unsigned long size; + int active; +} Triv121PgTblRec; + + +/************************** FORWARD DECLARATIONS *************************/ + +#ifdef DEBUG_EXC +static void myhdl (BSP_Exception_frame * excPtr); +#endif + +static void dumpPte (APte pte); + +#ifdef DEBUG +static void +dumpPteg (unsigned long vsid, unsigned long pi, unsigned long hash); +#endif + +unsigned long +triv121IsRangeMapped (long vsid, unsigned long start, unsigned long end); + +static void do_dssall (void); + +/**************************** STATIC VARIABLES ****************************/ + +/* dont malloc - we might have to use this before + * we have malloc or even RTEMS workspace available + */ +static Triv121PgTblRec pgTbl = { 0 }; + +#ifdef DEBUG_EXC +static void *ohdl; /* keep a pointer to the original handler */ +#endif + +/*********************** INLINES & PRIVATE ROUTINES ***********************/ + +/* compute the page table entry group (PTEG) of a hash */ +static inline APte +ptegOf (Triv121PgTbl pt, unsigned long hash) +{ + hash &= ((1 << LD_HASH_SIZE) - 1); + return (APte) (((unsigned long) pt-> + base) | ((hash << LD_PTEG_SIZE) & (pt->size - 1))); +} + +/* see if a vsid/pi combination is already mapped + * + * RETURNS: PTE of mapping / NULL if none exists + * + * NOTE: a vsid<0 is legal and will tell this + * routine that 'pi' is actually an EA to + * be split into vsid and pi... + */ +static APte +alreadyMapped (Triv121PgTbl pt, long vsid, unsigned long pi) +{ + int i; + unsigned long hash, api; + APte pte; + + if (!pt->size) + return 0; + + if (TRIV121_121_VSID == vsid) { + vsid = VSID121 (pi); + pi = PI121 (pi); + } else if (TRIV121_SEG_VSID == vsid) { + vsid = seg2vsid (pi); + pi = PI121 (pi); + } + + hash = PTE_HASH1 (vsid, pi); + api = API (pi); + for (i = 0, pte = ptegOf (pt, hash); i < PTE_PER_PTEG; i++, pte++) + if (pte->v && pte->vsid == vsid && pte->api == api && 0 == pte->h) + return pte; + /* try the secondary hash table */ + hash = PTE_HASH2 (hash); + for (i = 0, pte = ptegOf (pt, hash); i < PTE_PER_PTEG; i++, pte++) + if (pte->v && pte->vsid == vsid && pte->api == api && 1 == pte->h) + return pte; + return 0; +} + +/* find the first available slot for vsid/pi + * + * NOTE: it is NOT legal to pass a vsid<0 / EA combination. + * + * RETURNS free slot with the 'marked' field set. The 'h' + * field is set to 0 or one, depending on whether + * the slot was allocated by using the primary or + * the secondary hash, respectively. + */ +static APte +slotFor (Triv121PgTbl pt, unsigned long vsid, unsigned long pi) +{ + int i; + unsigned long hash; + APte pte; + + /* primary hash */ + hash = PTE_HASH1 (vsid, pi); + /* linear search thru all buckets for this hash */ + for (i = 0, pte = ptegOf (pt, hash); i < PTE_PER_PTEG; i++, pte++) { + if (!pte->v && !pte->marked) { + /* found a free PTE; mark it as potentially used and return */ + pte->h = 0; /* found by the primary hash fn */ + pte->marked = 1; + return pte; + } + } + +#ifdef DEBUG + /* Strange: if the hash table was allocated big enough, + * this should not happen (when using a 1:1 mapping) + * Give them some information... + */ + PRINTF ("## First hash bucket full - "); + dumpPteg (vsid, pi, hash); +#endif + + hash = PTE_HASH2 (hash); +#ifdef DEBUG + PRINTF (" Secondary pteg is 0x%08x\n", (unsigned) ptegOf (pt, hash)); +#endif + for (i = 0, pte = ptegOf (pt, hash); i < PTE_PER_PTEG; i++, pte++) { + if (!pte->v && !pte->marked) { + /* mark this pte as potentially used */ + pte->marked = 1; + pte->h = 1; + return pte; + } + } +#ifdef DEBUG + /* Even more strange - most likely, something is REALLY messed up */ + PRINTF ("## Second hash bucket full - "); + dumpPteg (vsid, pi, hash); +#endif + return 0; +} + +/* unmark all entries */ +static void +unmarkAll (Triv121PgTbl pt) +{ + unsigned long n = pt->size / sizeof (PTERec); + unsigned long i; + APte pte; + for (i = 0, pte = pt->base; i < n; i++, pte++) + pte->marked = 0; + +} + +/* calculate the minimal size of a page/hash table + * to map a range of 'size' bytes in EA space. + * + * RETURNS: size in 'number of bits', i.e. the + * integer part of LOGbase2(minsize) + * is returned. + * NOTE: G3/G4 machines need at least 16 bits + * (64k). + */ +unsigned long +triv121PgTblLdMinSize (unsigned long size) +{ + unsigned long i; + /* round 'size' up to the next page boundary */ + size += (1 << LD_PG_SIZE) - 1; + size &= ~((1 << LD_PG_SIZE) - 1); + /* divide by number of PTEs and multiply + * by the size of a PTE. + */ + size >>= LD_PG_SIZE - LD_PTE_SIZE; + /* find the next power of 2 >= size */ + for (i = 0; i < LD_PHYS_SIZE; i++) { + if ((1 << i) >= size) + break; + } + /* pop up to the allowed minimum, if necessary */ + if (i < LD_MIN_PT_SIZE) + i = LD_MIN_PT_SIZE; + return i; +} + +/* initialize a trivial page table of 2^ldSize bytes + * at 'base' in memory. + * + * RETURNS: OPAQUE HANDLE (not the hash table address) + * or NULL on failure. + */ +Triv121PgTbl +triv121PgTblInit (unsigned long base, unsigned ldSize) +{ + if (pgTbl.size) { + /* already initialized */ + return 0; + } + + if (ldSize < LD_MIN_PT_SIZE) + return 0; /* too small */ + + if (base & ((1 << ldSize) - 1)) + return 0; /* misaligned */ + + /* This was tested on 604r, 750 and 7400. + * On other CPUs, verify that the TLB invalidation works + * for a new CPU variant and that it has hardware PTE lookup/ + * TLB replacement before adding it to this list. + * + * NOTE: The 603 features no hardware PTE lookup - and + * hence the page tables should NOT be used. + * Although lookup could be implemented in + * software this is probably not desirable + * as it could have an impact on hard realtime + * performance, screwing deterministic latency! + * (Could still be useful for debugging, though) + */ + if ( ! ppc_cpu_has_hw_ptbl_lkup() ) + return 0; /* unsupported by this CPU */ + + pgTbl.base = (APte) base; + pgTbl.size = 1 << ldSize; + /* clear all page table entries */ + memset (pgTbl.base, 0, pgTbl.size); + + CONSCHECK (0); + + /* map the page table itself 'm' and 'readonly' */ + if (triv121PgTblMap (&pgTbl, + TRIV121_121_VSID, + base, + (pgTbl.size >> LD_PG_SIZE), + TRIV121_ATTR_M, TRIV121_PP_RO_PAGE) >= 0) + return 0; + + CONSCHECK ((pgTbl.size >> LD_PG_SIZE)); + + return &pgTbl; +} + +/* return the handle of the (one and only) page table + * or NULL if none has been initialized yet. + */ +Triv121PgTbl +triv121PgTblGet (void) +{ + return pgTbl.size ? &pgTbl : 0; +} + +/* NOTE: this routine returns -1 on success; + * on failure, the page table index for + * which no PTE could be allocated is returned + * + * (Consult header about argument/return value + * description) + */ +long +triv121PgTblMap (Triv121PgTbl pt, + long ovsid, + unsigned long start, + unsigned long numPages, + unsigned attributes, unsigned protection) +{ + int i, pass; + unsigned long pi; + APte pte; + long vsid; +#ifdef DEBUG + long saved_vsid = ovsid; +#endif + + if (TRIV121_121_VSID == ovsid) { + /* use 1:1 mapping */ + ovsid = VSID121 (start); + } else if (TRIV121_SEG_VSID == ovsid) { + ovsid = seg2vsid (start); + } + +#ifdef DEBUG + PRINTF ("Mapping %i (0x%x) pages at 0x%08x for VSID 0x%08x\n", + (unsigned) numPages, (unsigned) numPages, + (unsigned) start, (unsigned) ovsid); +#endif + + /* map in two passes. During the first pass, we try + * to claim entries as needed. The 'slotFor()' routine + * will 'mark' the claimed entries without 'valid'ating + * them. + * If the mapping fails, all claimed entries are unmarked + * and we return the PI for which allocation failed. + * + * Once we know that the allocation would succeed, we + * do a second pass; during the second pass, the PTE + * is actually written. + * + */ + for (pass = 0; pass < 2; pass++) { + /* check if we would succeed during the first pass */ + for (i = 0, pi = PI121 (start), vsid = ovsid; i < numPages; i++, pi++) { + if (pi >= 1 << LD_PI_SIZE) { + vsid++; + pi = 0; + } + /* leave alone existing mappings for this EA */ + if (!alreadyMapped (pt, vsid, pi)) { + if (!(pte = slotFor (pt, vsid, pi))) { + /* no free slot found for page index 'pi' */ + unmarkAll (pt); + return pi; + } else { + /* have a free slot; marked by slotFor() */ + if (pass) { + /* second pass; do the real work */ + pte->vsid = vsid; + /* H was set by slotFor() */ + pte->api = API (pi); + /* set up 1:1 mapping */ + pte->rpn = + ((((unsigned long) vsid) & + ((1 << (LD_PHYS_SIZE - LD_SEG_SIZE)) - + 1)) << LD_PI_SIZE) | pi; + pte->wimg = attributes & 0xf; + pte->pp = protection & 0x3; + /* mark it valid */ + pte->marked = 0; + if (pt->active) { + uint32_t flags; + rtems_interrupt_disable (flags); + /* order setting 'v' after writing everything else */ + __asm__ volatile ("eieio":::"memory"); + pte->v = 1; + __asm__ volatile ("sync":::"memory"); + rtems_interrupt_enable (flags); + } else { + pte->v = 1; + } + +#ifdef DEBUG + /* add paranoia */ + assert (alreadyMapped (pt, vsid, pi) == pte); +#endif + } + } + } + } + unmarkAll (pt); + } +#ifdef DEBUG + { + unsigned long failedat; + CONSCHECK (-1); + /* double check that the requested range is mapped */ + failedat = + triv121IsRangeMapped (saved_vsid, start, + start + (1 << LD_PG_SIZE) * numPages); + if (0x0C0C != failedat) { + PRINTF ("triv121 mapping failed at 0x%08x\n", (unsigned) failedat); + return PI121 (failedat); + } + } +#endif + return TRIV121_MAP_SUCCESS; /* -1 !! */ +} + +unsigned long +triv121PgTblSDR1 (Triv121PgTbl pt) +{ + return (((unsigned long) pt->base) & ~((1 << LD_MIN_PT_SIZE) - 1)) | + (((pt->size - 1) >> LD_MIN_PT_SIZE) & + ((1 << (LD_HASH_SIZE - (LD_MIN_PT_SIZE - LD_PTEG_SIZE))) - 1) + ); +} + +void +triv121PgTblActivate (Triv121PgTbl pt) +{ +#ifndef DEBUG_MAIN + unsigned long sdr1 = triv121PgTblSDR1 (pt); + register unsigned long tmp0 = 16; /* initial counter value (#segment regs) */ + register unsigned long tmp1 = (KEY_USR | KEY_SUP); + register unsigned long tmp2 = (MSR_EE | MSR_IR | MSR_DR); +#endif + pt->active = 1; + +#ifndef DEBUG_MAIN +#ifdef DEBUG_EXC + /* install our exception handler */ + ohdl = globalExceptHdl; + globalExceptHdl = myhdl; + __asm__ __volatile__ ("sync"::"memory"); +#endif + + /* This section of assembly code takes care of the + * following: + * - get MSR and switch interrupts + MMU off + * + * - load up the segment registers with a + * 1:1 effective <-> virtual mapping; + * give user & supervisor keys + * + * - flush all TLBs; + * NOTE: the TLB flushing code is probably + * CPU dependent! + * + * - setup SDR1 + * + * - restore original MSR + */ + __asm__ __volatile ( + " mtctr %[tmp0]\n" + /* Get MSR and switch interrupts off - just in case. + * Also switch the MMU off; the book + * says that SDR1 must not be changed with either + * MSR_IR or MSR_DR set. I would guess that it could + * be safe as long as the IBAT & DBAT mappings override + * the page table... + */ + " mfmsr %[tmp0]\n" + " andc %[tmp2], %[tmp0], %[tmp2]\n" + " mtmsr %[tmp2]\n" + " isync \n" + /* set up the segment registers */ + " li %[tmp2], 0\n" + "1: mtsrin %[tmp1], %[tmp2]\n" + " addis %[tmp2], %[tmp2], 0x1000\n" /* address next SR */ + " addi %[tmp1], %[tmp1], 1\n" /* increment VSID */ + " bdnz 1b\n" + /* Now flush all TLBs, starting with the topmost index */ + " lis %[tmp2], %[ea_range]@h\n" + "2: addic. %[tmp2], %[tmp2], -%[pg_sz]\n" /* address the next one (decrementing) */ + " tlbie %[tmp2]\n" /* invalidate & repeat */ + " bgt 2b\n" + " eieio \n" + " tlbsync \n" + " sync \n" + /* set up SDR1 */ + " mtspr %[sdr1], %[sdr1val]\n" + /* restore original MSR */ + " mtmsr %[tmp0]\n" + " isync \n" + :[tmp0]"+r&"(tmp0), [tmp1]"+b&"(tmp1), [tmp2]"+b&"(tmp2) + :[ea_range]"i"(FLUSH_EA_RANGE), [pg_sz]"i" (1 << LD_PG_SIZE), + [sdr1]"i"(SDR1), [sdr1val]"r" (sdr1) + :"ctr", "cc", "memory" + ); + + /* At this point, BAT0 is probably still active; it's the + * caller's job to deactivate it... + */ +#endif +} + +/************************** DEBUGGING ROUTINES *************************/ + +/* Exception handler to catch page faults */ +#ifdef DEBUG_EXC + +#define BAT_VALID_BOTH 3 /* allow user + super access */ + +static void +myhdl (BSP_Exception_frame * excPtr) +{ + if (3 == excPtr->_EXC_number) { + unsigned long dsisr; + + /* reactivate DBAT0 and read DSISR */ + __asm__ __volatile__ ( + "mfspr %0, %1 \n" + "ori %0, %0, 3\n" + "mtspr %1, %0 \n" + "sync\n" + "mfspr %0, %2\n" + :"=&r" (dsisr) + :"i" (DBAT0U), "i" (DSISR), "i" (BAT_VALID_BOTH) + ); + + printk ("Data Access Exception (DSI) # 3\n"); + printk ("Reactivated DBAT0 mapping\n"); + + + printk ("DSISR 0x%08x\n", dsisr); + + printk ("revectoring to prevent default handler panic().\n"); + printk ("NOTE: exception number %i below is BOGUS\n", ASM_DEC_VECTOR); + /* make this exception 'recoverable' for + * the default handler by faking a decrementer + * exception. + * Note that the default handler's message will be + * wrong about the exception number. + */ + excPtr->_EXC_number = ASM_DEC_VECTOR; + } +/* now call the original handler */ + ((void (*)()) ohdl) (excPtr); +} +#endif + + + +#ifdef DEBUG +/* test the consistency of the page table + * + * 'pass' is merely a number which will be printed + * by this routine, so the caller may give some + * context information. + * + * 'expected' is the number of valid (plus 'marked') + * entries the caller believes the page table should + * have. This routine complains if its count differs. + * + * It basically verifies that the topmost 20bits + * of all VSIDs as well as the unused bits are all + * zero. Then it counts all valid and all 'marked' + * entries, adding them up and comparing them to the + * 'expected' number of occupied slots. + * + * RETURNS: total number of valid plus 'marked' slots. + */ +static unsigned long +triv121PgTblConsistency (Triv121PgTbl pt, int pass, int expected) +{ + APte pte; + int i; + unsigned v, m; + int warn = 0; + int errs = 0; + static int maxw = 20; /* mute after detecting this many errors */ + + PRINTF ("Checking page table at 0x%08x (size %i==0x%x)\n", + (unsigned) pt->base, (unsigned) pt->size, (unsigned) pt->size); + + if (!pt->base || !pt->size) { + PRINTF ("Uninitialized Page Table!\n"); + return 0; + } + + v = m = 0; +#if 1 + /* 10/9/2002: I had machine checks crashing after this loop + * terminated. Maybe caused by speculative loads + * from beyond the valid memory area (since the + * page hash table sits at the top of physical + * memory). + * Very bizarre - the other loops in this file + * seem to be fine. Maybe there is a compiler bug?? + * For the moment, I let the loop run backwards... + * + * Also see the comment a couple of lines down. + */ + for (i = pt->size / sizeof (PTERec) - 1, pte = pt->base + i; i >= 0; + i--, pte--) +#else + for (i = 0, pte = pt->base; i < pt->size / sizeof (PTERec); i++, pte++) +#endif + { + int err = 0; + char buf[500]; + unsigned long *lp = (unsigned long *) pte; +#if 0 + /* If I put this bogus while statement here (the body is + * never reached), the original loop works OK + */ + while (pte >= pt->base + pt->size / sizeof (PTERec)) + /* never reached */ ; +#endif + + if ( /* T.S: allow any VSID... (*lp & (0xfffff0 << 7)) || */ (*(lp + 1) & 0xe00) + || (pte->v && pte->marked)) { + /* check for vsid (without segment bits) == 0, unused bits == 0, valid && marked */ + sprintf (buf, "unused bits or v && m"); + err = 1; + } else { + if ( (*lp & (0xfffff0 << 7)) ) { + sprintf(buf,"(warning) non-1:1 VSID found"); + err = 2; + } + if (pte->v) + v++; + if (pte->marked) + m++; + } + if (err && maxw) { + PRINTF + ("Pass %i -- strange PTE at 0x%08x found for page index %i == 0x%08x:\n", + pass, (unsigned) pte, i, i); + PRINTF ("Reason: %s\n", buf); + dumpPte (pte); + if ( err & 2 ) { + warn++; + } else { + errs++; + } + maxw--; + } + } + if (errs) { + PRINTF ("%i errors %s", errs, warn ? "and ":""); + } + if (warn) { + PRINTF ("%i warnings ",warn); + } + if (errs || warn) { + PRINTF ("found; currently %i entries marked, %i are valid\n", + m, v); + } + v += m; + if (maxw && expected >= 0 && expected != v) { + /* number of occupied slots not what they expected */ + PRINTF ("Wrong # of occupied slots detected during pass"); + PRINTF ("%i; should be %i (0x%x) is %i (0x%x)\n", + pass, expected, (unsigned) expected, v, (unsigned) v); + maxw--; + } + return v; +} +#endif + +/* Find the PTE for a EA and print its contents + * RETURNS: pte for EA or NULL if no entry was found. + */ +APte +triv121DumpEa (unsigned long ea) +{ + APte pte; + + pte = + alreadyMapped (&pgTbl, pgTbl.active ? TRIV121_SEG_VSID : TRIV121_121_VSID, + ea); + + if (pte) + dumpPte (pte); + return pte; +} + +APte +triv121FindPte (unsigned long vsid, unsigned long pi) +{ + return alreadyMapped (&pgTbl, vsid, pi); +} + +APte +triv121UnmapEa (unsigned long ea) +{ + uint32_t flags; + APte pte; + + if (!pgTbl.active) { + pte = alreadyMapped (&pgTbl, TRIV121_121_VSID, ea); + if (pte) /* alreadyMapped checks for pte->v */ + pte->v = 0; + return pte; + } + + pte = alreadyMapped (&pgTbl, TRIV121_SEG_VSID, ea); + + if (!pte) + return 0; + + rtems_interrupt_disable (flags); + pte->v = 0; + do_dssall (); + __asm__ volatile (" sync \n\t" + " tlbie %0 \n\t" + " eieio \n\t" + " tlbsync \n\t" + " sync \n\t"::"r" (ea):"memory"); + rtems_interrupt_enable (flags); + return pte; +} + +/* A context synchronizing jump */ +#define SYNC_LONGJMP(msr) \ + asm volatile( \ + " mtsrr1 %0 \n\t" \ + " bl 1f \n\t" \ + "1: mflr 3 \n\t" \ + " addi 3,3,1f-1b \n\t" \ + " mtsrr0 3 \n\t" \ + " rfi \n\t" \ + "1: \n\t" \ + : \ + :"r"(msr) \ + :"3","lr","memory") + +/* The book doesn't mention dssall when changing PTEs + * but they require it for BAT changes and I guess + * it makes sense in the case of PTEs as well. + * Just do it to be on the safe side... + */ +static void +do_dssall (void) +{ + /* Before changing BATs, 'dssall' must be issued. + * We check MSR for MSR_VE and issue a 'dssall' if + * MSR_VE is set hoping that + * a) on non-altivec CPUs MSR_VE reads as zero + * b) all altivec CPUs use the same bit + * + * NOTE: psim doesn't implement dssall so we skip if we run on psim + */ + if ( (_read_MSR () & MSR_VE) && PPC_PSIM != get_ppc_cpu_type() ) { + /* this construct is needed because we don't know + * if this file is compiled with -maltivec. + * (I plan to add altivec support outside of + * RTEMS core and hence I'd rather not + * rely on consistent compiler flags). + */ +#define DSSALL 0x7e00066c /* dssall opcode */ + __asm__ volatile (" .long %0"::"i" (DSSALL)); +#undef DSSALL + } +} + +APte +triv121ChangeEaAttributes (unsigned long ea, int wimg, int pp) +{ + APte pte; + unsigned long msr; + + if (!pgTbl.active) { + pte = alreadyMapped (&pgTbl, TRIV121_121_VSID, ea); + if (!pte) + return 0; + if (wimg > 0) + pte->wimg = wimg; + if (pp > 0) + pte->pp = pp; + return pte; + } + + pte = alreadyMapped (&pgTbl, TRIV121_SEG_VSID, ea); + + if (!pte) + return 0; + + if (wimg < 0 && pp < 0) + return pte; + + __asm__ volatile ("mfmsr %0":"=r" (msr)); + + /* switch MMU and IRQs off */ + SYNC_LONGJMP (msr & ~(MSR_EE | MSR_DR | MSR_IR)); + + pte->v = 0; + do_dssall (); + __asm__ volatile ("sync":::"memory"); + if (wimg >= 0) + pte->wimg = wimg; + if (pp >= 0) + pte->pp = pp; + __asm__ volatile ("tlbie %0; eieio"::"r" (ea):"memory"); + pte->v = 1; + __asm__ volatile ("tlbsync; sync":::"memory"); + + /* restore, i.e., switch MMU and IRQs back on */ + SYNC_LONGJMP (msr); + + return pte; +} + +static void +pgtblChangePP (Triv121PgTbl pt, int pp) +{ + unsigned long n = pt->size >> LD_PG_SIZE; + unsigned long b, i; + + for (i = 0, b = (unsigned long) pt->base; i < n; + i++, b += (1 << LD_PG_SIZE)) { + triv121ChangeEaAttributes (b, -1, pp); + } +} + +void +triv121MakePgTblRW () +{ + pgtblChangePP (&pgTbl, TRIV121_PP_RW_PAGE); +} + +void +triv121MakePgTblRO () +{ + pgtblChangePP (&pgTbl, TRIV121_PP_RO_PAGE); +} + +long +triv121DumpPte (APte pte) +{ + if (pte) + dumpPte (pte); + return 0; +} + + +#ifdef DEBUG +/* Dump an entire PTEG */ + +static void +dumpPteg (unsigned long vsid, unsigned long pi, unsigned long hash) +{ + APte pte = ptegOf (&pgTbl, hash); + int i; + PRINTF ("hash 0x%08x, pteg 0x%08x (vsid 0x%08x, pi 0x%08x)\n", + (unsigned) hash, (unsigned) pte, (unsigned) vsid, (unsigned) pi); + for (i = 0; i < PTE_PER_PTEG; i++, pte++) { + PRINTF ("pte 0x%08x is 0x%08x : 0x%08x\n", + (unsigned) pte, + (unsigned) *(unsigned long *) pte, + (unsigned) *(((unsigned long *) pte) + 1)); + } +} +#endif + +/* Verify that a range of addresses is mapped the page table. + * start/end are segment offsets or EAs (if vsid has one of + * the special values), respectively. + * + * RETURNS: address of the first page for which no + * PTE was found (i.e. page index * page size) + * + * ON SUCCESS, the special value 0x0C0C ("OKOK") + * [which is not page aligned and hence is not + * a valid page address]. + */ + +unsigned long +triv121IsRangeMapped (long vsid, unsigned long start, unsigned long end) +{ +unsigned pi; + + start &= ~((1 << LD_PG_SIZE) - 1); + while (start < end) { + if ( TRIV121_SEG_VSID != vsid && TRIV121_121_VSID != vsid ) + pi = PI121(start); + else + pi = start; + if (!alreadyMapped (&pgTbl, vsid, pi)) + return start; + start += 1 << LD_PG_SIZE; + } + return 0x0C0C; /* OKOK - not on a page boundary */ +} + + +#include <stdlib.h> + +/* print a PTE */ +static void +dumpPte (APte pte) +{ + if (0 == ((unsigned long) pte & ((1 << LD_PTEG_SIZE) - 1))) + PRINTF ("PTEG--"); + else + PRINTF ("......"); + if (pte->v) { + PRINTF ("VSID: 0x%08x H:%1i API: 0x%02x\n", pte->vsid, pte->h, pte->api); + PRINTF (" "); + PRINTF ("RPN: 0x%08x WIMG: 0x%1x, (m %1i), pp: 0x%1x\n", + pte->rpn, pte->wimg, pte->marked, pte->pp); + } else { + PRINTF ("xxxxxx\n"); + PRINTF (" "); + PRINTF ("xxxxxx\n"); + } +} + + +#if defined(DEBUG_MAIN) +/* dump page table entries from index 'from' to 'to' + * The special values (unsigned)-1 are allowed which + * cause the routine to dump the entire table. + * + * RETURNS 0 + */ +int +triv121PgTblDump (Triv121PgTbl pt, unsigned from, unsigned to) +{ + int i; + APte pte; + PRINTF ("Dumping PT [size 0x%08x == %i] at 0x%08x\n", + (unsigned) pt->size, (unsigned) pt->size, (unsigned) pt->base); + if (from > pt->size >> LD_PTE_SIZE) + from = 0; + if (to > pt->size >> LD_PTE_SIZE) + to = (pt->size >> LD_PTE_SIZE); + for (i = from, pte = pt->base + from; i < (long) to; i++, pte++) { + dumpPte (pte); + } + return 0; +} + + + +#define LD_DBG_PT_SIZE LD_MIN_PT_SIZE + +int +main (int argc, char **argv) +{ + unsigned long base, start, numPages; + unsigned long size = 1 << LD_DBG_PT_SIZE; + Triv121PgTbl pt; + + base = (unsigned long) malloc (size << 1); + + assert (base); + + /* align pt */ + base += size - 1; + base &= ~(size - 1); + + assert (pt = triv121PgTblInit (base, LD_DBG_PT_SIZE)); + + triv121PgTblDump (pt, (unsigned) -1, (unsigned) -1); + do { + do { + PRINTF ("Start Address:"); + fflush (stdout); + } while (1 != scanf ("%i", &start)); + do { + PRINTF ("# pages:"); + fflush (stdout); + } while (1 != scanf ("%i", &numPages)); + } while (TRIV121_MAP_SUCCESS == + triv121PgTblMap (pt, TRIV121_121_VSID, start, numPages, + TRIV121_ATTR_IO_PAGE, 2) + && 0 == triv121PgTblDump (pt, (unsigned) -1, (unsigned) -1)); +} +#endif |