summaryrefslogtreecommitdiffstats
path: root/bsps/powerpc/motorola_powerpc
diff options
context:
space:
mode:
authorSebastian Huber <sebastian.huber@embedded-brains.de>2018-04-24 07:06:36 +0200
committerSebastian Huber <sebastian.huber@embedded-brains.de>2018-04-24 10:24:18 +0200
commit03e1d8378ecee81bd6ac40b41145c36dfd8752a4 (patch)
treebae7c12cdecdbb64a969c9db98526cb5ed47a512 /bsps/powerpc/motorola_powerpc
parentbsps/arm: Remove unused shared/comm/uart.c (diff)
downloadrtems-03e1d8378ecee81bd6ac40b41145c36dfd8752a4.tar.bz2
bsps/powerpc: Move bootloader to bsps
This bootloader is only used by the motorola_powerpc BSP. This patch is a part of the BSP source reorganization. Update #3285.
Diffstat (limited to 'bsps/powerpc/motorola_powerpc')
-rw-r--r--bsps/powerpc/motorola_powerpc/bootloader/README45
-rw-r--r--bsps/powerpc/motorola_powerpc/bootloader/bootldr.h268
-rw-r--r--bsps/powerpc/motorola_powerpc/bootloader/em86.c574
-rw-r--r--bsps/powerpc/motorola_powerpc/bootloader/em86real.S4553
-rw-r--r--bsps/powerpc/motorola_powerpc/bootloader/exception.S471
-rw-r--r--bsps/powerpc/motorola_powerpc/bootloader/head.S466
-rw-r--r--bsps/powerpc/motorola_powerpc/bootloader/lib.c62
-rw-r--r--bsps/powerpc/motorola_powerpc/bootloader/misc.c545
-rw-r--r--bsps/powerpc/motorola_powerpc/bootloader/mm.c996
-rw-r--r--bsps/powerpc/motorola_powerpc/bootloader/pci.c1374
-rw-r--r--bsps/powerpc/motorola_powerpc/bootloader/pci.h95
-rw-r--r--bsps/powerpc/motorola_powerpc/bootloader/ppcboot.lds96
-rw-r--r--bsps/powerpc/motorola_powerpc/bootloader/qemu_fakeres.c226
-rw-r--r--bsps/powerpc/motorola_powerpc/bootloader/qemu_fakerom.S217
-rw-r--r--bsps/powerpc/motorola_powerpc/bootloader/zlib.c2102
-rw-r--r--bsps/powerpc/motorola_powerpc/bootloader/zlib.h434
-rw-r--r--bsps/powerpc/motorola_powerpc/console/console.inl (renamed from bsps/powerpc/motorola_powerpc/dev/console.inl)0
-rw-r--r--bsps/powerpc/motorola_powerpc/console/keyboard.h (renamed from bsps/powerpc/motorola_powerpc/dev/keyboard.h)0
-rw-r--r--bsps/powerpc/motorola_powerpc/console/polled_io.c (renamed from bsps/powerpc/motorola_powerpc/dev/polled_io.c)0
19 files changed, 12524 insertions, 0 deletions
diff --git a/bsps/powerpc/motorola_powerpc/bootloader/README b/bsps/powerpc/motorola_powerpc/bootloader/README
new file mode 100644
index 0000000000..23af0c9a9e
--- /dev/null
+++ b/bsps/powerpc/motorola_powerpc/bootloader/README
@@ -0,0 +1,45 @@
+The code in this directory has been taken WITH PERMISSION from
+Gabriel Paubert, paubert@iram.es. The main reason for having
+a separate bootloader for PreP compliant firmware is that the
+initial code is relocated by firmware at an unknow address
+(actually 0x5000 on motorola MCP750) and that as Gabriel I
+think having a relocatable bootloder code is a must.
+
+So the way of building a binary executable that can be booted via
+hard disk or network boot goes like this :
+
+ - make a RTEMS executable,
+ - put is as data section in the bootloder binary,
+ - relink the loader (see make-exe macros for details),
+
+I would like to thank Gabriel for his support and his code.
+The original code can be found in form of a patch to official linux
+kernel at (I insist not vger ppc kernel or Imac ppc kernels!!) :
+
+<ftp://vlab1.iram.es/pub/linux-2.2/>
+
+After applying the patch, the code is located in a new directory
+called prepboot.
+
+(NB : note use ftp not netscape...)
+
+Note that the actual code differs a lot since Gabriel choose to use
+a CHRP compliant mapping instead of a Prep Mapping to save
+BATs. I had no time to upgrade the code to its new one allthough
+I agree it should be done...
+
+I have also splitted the original code to have a more modular
+design enabling to reuse code between the loader and RTEMS
+initialization (e.g printk, ...).
+
+Eric Valette (valette@crf.canon.fr)
+
+
+**************************************************
+2003/5/7, Greg Menke, gregory.menke@gsfc.nasa.gov
+
+Reworked the pci bus 0 initialization a little and added support for
+configuring an arbitrary number of other busses & their respective
+bridges. Also added support for configuring IO ranges below 0x10000,
+which I think is reasonable given this is a PowerPC bsp.
+
diff --git a/bsps/powerpc/motorola_powerpc/bootloader/bootldr.h b/bsps/powerpc/motorola_powerpc/bootloader/bootldr.h
new file mode 100644
index 0000000000..c2e95d53a5
--- /dev/null
+++ b/bsps/powerpc/motorola_powerpc/bootloader/bootldr.h
@@ -0,0 +1,268 @@
+/*
+ * bootldr.h -- Include file for bootloader.
+ */
+
+/*
+ * Copyright (C) 1998, 1999 Gabriel Paubert, paubert@iram.es
+ *
+ * Modified to compile in RTEMS development environment
+ * by Eric Valette
+ *
+ * Copyright (C) 1999 Eric Valette. valette@crf.canon.fr
+ *
+ * The license and distribution terms for this file may be
+ * found in the file LICENSE in this distribution or at
+ * http://www.rtems.org/license/LICENSE.
+ */
+
+#ifndef _PPC_BOOTLDR_H
+#define _PPC_BOOTLDR_H
+
+#ifndef ASM
+#include <stdint.h>
+#include <bsp/residual.h>
+#include <bsp/consoleIo.h>
+#include "pci.h"
+
+#define abs __builtin_abs
+
+#define PTE_REFD 0x100
+#define PTE_CHNG (0x80|PTE_REFD) /* Modified implies referenced */
+#define PTE_WTHR 0x040
+#define PTE_CINH 0x020
+#define PTE_COHER 0x010
+#define PTE_GUAR 0x008
+#define PTE_RO 0x003
+#define PTE_RW 0x002
+
+#define PTE_RAM (PTE_CHNG|PTE_COHER|PTE_RW)
+#define PTE_ROM (PTE_REFD|PTE_RO)
+#define PTE_IO (PTE_CHNG|PTE_CINH|PTE_GUAR|PTE_RW)
+
+typedef struct {}opaque;
+
+/* The context passed during MMU interrupts. */
+typedef struct _ctxt {
+ u_long lr, ctr;
+ u_int cr, xer;
+ u_long nip, msr;
+ u_long regs[32];
+} ctxt;
+
+/* The main structure which is pointed to permanently by r13. Things
+ * are not separated very well between parts because it would cause
+ * too much code bloat for such a simple program like the bootloader.
+ * The code is designed to be compiled with the -m relocatable option and
+ * tries to minimize the number of relocations/fixups and the number of
+ * functions who have to access the .got2 sections (this increases the
+ * size of the prologue in every function).
+ */
+typedef struct _boot_data {
+ RESIDUAL *residual;
+ void *load_address;
+ void *of_entry;
+ void *r6, *r7, *r8, *r9, *r10;
+ u_long cache_lsize;
+ void *image; /* Where to copy ourselves */
+ void *stack;
+ void *mover; /* where to copy codemove to avoid overlays */
+ u_long o_msr, o_hid0, o_r31;
+ opaque * mm_private;
+ const struct pci_bootloader_config_access_functions* pci_functions;
+ opaque * pci_private;
+ struct pci_dev * pci_devices;
+ opaque * v86_private;
+ char cmd_line[256];
+} boot_data;
+
+register boot_data *bd __asm__("r13");
+
+static inline int
+pcibios_read_config_byte(u_char bus, u_char dev_fn,
+ u_char where, uint8_t *val) {
+ return bd->pci_functions->read_config_byte(bus, dev_fn, where, val);
+}
+
+static inline int
+pcibios_read_config_word(u_char bus, u_char dev_fn,
+ u_char where, uint16_t *val) {
+ return bd->pci_functions->read_config_word(bus, dev_fn, where, val);
+}
+
+static inline int
+pcibios_read_config_dword(u_char bus, u_char dev_fn,
+ u_char where, uint32_t *val) {
+ return bd->pci_functions->read_config_dword(bus, dev_fn, where, val);
+}
+
+static inline int
+pcibios_write_config_byte(u_char bus, u_char dev_fn,
+ u_char where, uint8_t val) {
+ return bd->pci_functions->write_config_byte(bus, dev_fn, where, val);
+}
+
+static inline int
+pcibios_write_config_word(u_char bus, u_char dev_fn,
+ u_char where, uint16_t val) {
+ return bd->pci_functions->write_config_word(bus, dev_fn, where, val);
+}
+
+static inline int
+pcibios_write_config_dword(u_char bus, u_char dev_fn,
+ u_char where, uint32_t val) {
+ return bd->pci_functions->write_config_dword(bus, dev_fn, where, val);
+}
+
+static inline int
+pci_bootloader_read_config_byte(struct pci_dev *dev, u_char where, uint8_t *val) {
+ return bd->pci_functions->read_config_byte(dev->bus->number,
+ dev->devfn,
+ where, val);
+}
+
+static inline int
+pci_bootloader_read_config_word(struct pci_dev *dev, u_char where, uint16_t *val) {
+ return bd->pci_functions->read_config_word(dev->bus->number,
+ dev->devfn,
+ where, val);
+}
+
+static inline int
+pci_bootloader_read_config_dword(struct pci_dev *dev, u_char where, uint32_t *val) {
+ return bd->pci_functions->read_config_dword(dev->bus->number,
+ dev->devfn,
+ where, val);
+}
+
+static inline int
+pci_bootloader_write_config_byte(struct pci_dev *dev, u_char where, uint8_t val) {
+ return bd->pci_functions->write_config_byte(dev->bus->number,
+ dev->devfn,
+ where, val);
+}
+
+static inline int
+pci_bootloader_write_config_word(struct pci_dev *dev, u_char where, uint16_t val) {
+ return bd->pci_functions->write_config_word(dev->bus->number,
+ dev->devfn,
+ where, val);
+}
+
+static inline int
+pci_bootloader_write_config_dword(struct pci_dev *dev, u_char where, uint32_t val) {
+ return bd->pci_functions->write_config_dword(dev->bus->number,
+ dev->devfn,
+ where, val);
+}
+
+/* codemove is like memmove, but it also gets the cache line size
+ * as 4th parameter to synchronize them. If this last parameter is
+ * zero, it performs more or less like memmove. No copy is performed if
+ * source and destination addresses are equal. However the caches
+ * are synchronized. Note that the size is always rounded up to the
+ * next mutiple of 4.
+ */
+extern void * codemove(void *, const void *, size_t, unsigned long);
+
+/* The physical memory allocator allows to align memory by
+ * powers of 2 given by the lower order bits of flags.
+ * By default it allocates from higher addresses towrds lower ones,
+ * setting PA_LOW reverses this behaviour.
+ */
+
+#define palloc(size) __palloc(size,0)
+
+#define isa_io_base (bd->io_base)
+
+void * __palloc(u_long, int);
+void pfree(void *);
+
+#define PA_LOW 0x100
+#define PA_PERM 0x200 /* Not freeable by pfree */
+#define PA_SUBALLOC 0x400 /* Allocate for suballocation by salloc */
+#define PA_ALIGN_MASK 0x1f
+
+void * valloc(u_long size);
+void vfree(void *);
+
+int vmap(void *, u_long, u_long);
+void vunmap(void *);
+
+void * salloc(u_long size);
+void sfree(void *);
+
+void pci_init(void);
+
+void * memset(void *p, int c, size_t n);
+
+void gunzip(void *, int, unsigned char *, int *);
+
+void print_all_maps(const char *);
+void print_hash_table(void);
+void MMUon(void);
+void MMUoff(void);
+void hang(const char *, u_long, ctxt *) __attribute__((noreturn));
+
+int init_v86(void);
+void cleanup_v86_mess(void);
+void em86_main(struct pci_dev *);
+int find_max_mem(struct pci_dev *);
+
+/*
+ * Prototypes for calls from assembly and across files.
+ */
+typedef struct _x86 x86;
+
+int em86_trap(x86 *p);
+void decompress_kernel(int kernel_size, void * zimage_start, int len,
+ void * initrd_start, int initrd_len );
+void boot_udelay(uint32_t _microseconds);
+void setup_hw(void);
+void _handler(int vec, ctxt *p);
+int early_setup(u_long image_size);
+void mm_init(u_long image_size);
+#endif
+
+#ifdef ASM
+/* These definitions simplify the ugly declarations necessary for
+ * GOT definitions.
+ */
+
+#define GOT_ENTRY(NAME) .L_ ## NAME = . - .LCTOC1 ; .long NAME
+#define GOT(NAME) .L_ ## NAME (r30)
+
+#define START_GOT \
+ .section ".got2","aw"; \
+.LCTOC1 = .+ 0x8000
+
+#define END_GOT \
+ .text
+
+#define GET_GOT \
+ bl 1f; \
+ .text 2; \
+0: .long .LCTOC1-1f; \
+ .text ; \
+1: mflr r30; \
+ lwz r0,0b-1b(r30); \
+ add r30,r0,r30
+
+#define bd r13
+#define cache_lsize 32 /* Offset into bd area */
+#define image 36
+#define stack 40
+#define mover 44
+#define o_msr 48
+#define o_hid0 52
+#define o_r31 56
+/* Stack offsets for saved registers on exceptions */
+#define save_lr 8(r1)
+#define save_ctr 12(r1)
+#define save_cr 16(r1)
+#define save_xer 20(r1)
+#define save_nip 24(r1)
+#define save_msr 28(r1)
+#define save_r(n) 32+4*n(r1)
+#endif
+
+#endif
diff --git a/bsps/powerpc/motorola_powerpc/bootloader/em86.c b/bsps/powerpc/motorola_powerpc/bootloader/em86.c
new file mode 100644
index 0000000000..5ce4b0cc34
--- /dev/null
+++ b/bsps/powerpc/motorola_powerpc/bootloader/em86.c
@@ -0,0 +1,574 @@
+/*
+ * em86.c -- Include file for bootloader.
+ */
+
+/*
+ * Copyright (C) 1998, 1999 Gabriel Paubert, paubert@iram.es
+ *
+ * Modified to compile in RTEMS development environment
+ * by Eric Valette
+ *
+ * Copyright (C) 1999 Eric Valette. valette@crf.canon.fr
+ *
+ * The license and distribution terms for this file may be
+ * found in the file LICENSE in this distribution or at
+ * http://www.rtems.org/license/LICENSE.
+ */
+
+/*****************************************************************************
+*
+* Code to interpret Video BIOS ROM routines.
+*
+*
+******************************************************************************/
+
+/* These include are for the development version only */
+#include <sys/types.h>
+#include "pci.h"
+#include <libcpu/byteorder.h>
+#ifdef __BOOT__
+#include "bootldr.h"
+#include <limits.h>
+#include <rtems/bspIo.h>
+#endif
+
+/* Code options, put them on the compiler command line */
+/* #define EIP_STATS */ /* EIP based profiling */
+/* #undef EIP_STATS */
+
+typedef union _reg_type1 {
+ unsigned e;
+ unsigned short x;
+ struct {
+ unsigned char l, h;
+ } lh;
+} reg_type1;
+
+typedef union _reg_type2 {
+ uint32_t e;
+ uint16_t x;
+} reg_type2;
+
+typedef struct _x86 {
+ reg_type1
+ _eax, _ecx, _edx, _ebx;
+ reg_type2
+ _esp, _ebp, _esi, _edi;
+ unsigned
+ es, cs, ss, ds, fs, gs, eip, eflags;
+ unsigned char
+ *esbase, *csbase, *ssbase, *dsbase, *fsbase, *gsbase;
+ volatile unsigned char *iobase;
+ unsigned char *ioperm;
+ unsigned
+ reason, nexteip, parm1, parm2, opcode, base;
+ unsigned *optable, opreg; /* no more used! */
+ unsigned char* vbase;
+ unsigned instructions;
+#ifdef __BOOT__
+ u_char * ram;
+ u_char * rom;
+ struct pci_dev * dev;
+#else
+ unsigned filler[14]; /* Skip to next 64 byte boundary */
+ unsigned eipstats[32768][2];
+#endif
+} x86;
+
+x86 v86_private __attribute__((aligned(32)));
+
+/* Emulator is in another source file */
+extern
+void em86_enter(x86 * p);
+
+#define EAX (p->_eax.e)
+#define ECX (p->_ecx.e)
+#define EDX (p->_edx.e)
+#define EBX (p->_ebx.e)
+#define ESP (p->_esp.e)
+#define EBP (p->_ebp.e)
+#define ESI (p->_esi.e)
+#define EDI (p->_edi.e)
+#define AX (p->_eax.x)
+#define CX (p->_ecx.x)
+#define DX (p->_edx.x)
+#define BX (p->_ebx.x)
+#define SP (p->_esp.x)
+#define BP (p->_ebp.x)
+#define SI (p->_esi.x)
+#define DI (p->_edi.x)
+#define AL (p->_eax.lh.l)
+#define CL (p->_ecx.lh.l)
+#define DL (p->_edx.lh.l)
+#define BL (p->_ebx.lh.l)
+#define AH (p->_eax.lh.h)
+#define CH (p->_ecx.lh.h)
+#define DH (p->_edx.lh.h)
+#define BH (p->_ebx.lh.h)
+
+/* Function used to debug */
+#ifdef __BOOT__
+#define printf printk
+#endif
+#ifdef DEBUG
+static void dump86(x86 * p){
+ unsigned char *s = p->csbase + p->eip;
+ printf("cs:eip=%04x:%08x, eax=%08x, ecx=%08x, edx=%08x, ebx=%08x\n",
+ p->cs, p->eip, ld_le32(&EAX),
+ ld_le32(&ECX), ld_le32(&EDX), ld_le32(&EBX));
+ printf("ss:esp=%04x:%08x, ebp=%08x, esi=%08x, edi=%08x, efl=%08x\n",
+ p->ss, ld_le32(&ESP), ld_le32(&EBP),
+ ld_le32(&ESI), ld_le32(&EDI), p->eflags);
+ printf("nip=%08x, ds=%04x, es=%04x, fs=%04x, gs=%04x, total=%d\n",
+ p->nexteip, p->ds, p->es, p->fs, p->gs, p->instructions);
+ printf("code: %02x %02x %02x %02x %02x %02x "
+ "%02x %02x %02x %02x %02x %02x\n",
+ s[0], s[1], s[2], s[3], s[4], s[5],
+ s[6], s[7], s[8], s[9], s[10], s[11]);
+#ifndef __BOOT__
+ printf("op1=%08x, op2=%08x, result=%08x, flags=%08x\n",
+ p->filler[11], p->filler[12], p->filler[13], p->filler[14]);
+#endif
+}
+#else
+#define dump86(x)
+#endif
+
+static int bios86pci(x86 * p) {
+ unsigned reg=ld_le16(&DI);
+ reg_type2 tmp;
+
+ if (AL>=8 && AL<=13 && reg>0xff) {
+ AH = PCIBIOS_BAD_REGISTER_NUMBER;
+ } else {
+ switch(AL) {
+ case 2: /* find_device */
+ /* Should be improved for BIOS able to handle
+ * multiple devices. We simply suppose the BIOS
+ * inits a single device, and return an error
+ * if it tries to find more...
+ */
+ if (SI) {
+ AH=PCIBIOS_DEVICE_NOT_FOUND;
+ } else {
+ BH = p->dev->bus->number;
+ BL = p->dev->devfn;
+ AH = 0;
+ }
+ break;
+ /*
+ case 3: find_class not implemented for now.
+ */
+ case 8: /* read_config_byte */
+ AH=pcibios_read_config_byte(BH, BL, reg, &CL);
+ break;
+ case 9: /* read_config_word */
+ AH=pcibios_read_config_word(BH, BL, reg, &tmp.x);
+ CX=ld_le16(&tmp.x);
+ break;
+ case 10: /* read_config_dword */
+ AH=pcibios_read_config_dword(BH, BL, reg, &tmp.e);
+ ECX=ld_le32(&tmp.e);
+ break;
+ case 11: /* write_config_byte */
+ AH=pcibios_write_config_byte(BH, BL, reg, CL);
+ break;
+ case 12: /* write_config_word */
+ AH=pcibios_write_config_word(BH, BL, reg, ld_le16(&CX));
+ break;
+ case 13: /* write_config_dword */
+ AH=pcibios_write_config_dword(
+ BH, BL, reg, ld_le32((uint32_t *)&ECX));
+ break;
+ default:
+ printf("Unimplemented or illegal PCI service call #%d!\n",
+ AL);
+ return 1;
+ }
+ }
+ p->eip = p->nexteip;
+ /* Set/clear carry according to result */
+ if (AH) p->eflags |= 1; else p->eflags &=~1;
+ return 0;
+}
+
+static void push2(x86 *p, unsigned value) {
+ unsigned char * sbase= p->ssbase;
+ unsigned newsp = (ld_le16(&SP)-2)&0xffff;
+ st_le16(&SP,newsp);
+ st_le16((unsigned short *)(sbase+newsp), value);
+}
+
+static unsigned pop2(x86 *p) {
+ unsigned char * sbase=p->ssbase;
+ unsigned oldsp = ld_le16(&SP);
+ st_le16(&SP,oldsp+2);
+ return ld_le16((unsigned short *)(sbase+oldsp));
+}
+
+static int int10h(x86 * p) { /* Process BIOS video interrupt */
+ unsigned vector;
+ vector=ld_le32((uint32_t *)p->vbase+0x10);
+ if (((vector&0xffff0000)>>16)==0xc000) {
+ push2(p, p->eflags);
+ push2(p, p->cs);
+ push2(p, p->nexteip);
+ p->cs=vector>>16;
+ p->csbase=p->vbase + (p->cs<<4);
+ p->eip=vector&0xffff;
+#if 1
+ p->eflags&=0xfcff; /* Clear AC/TF/IF */
+#else
+ p->eflags = (p->eflags&0xfcff)|0x100; /* Set TF for debugging */
+#endif
+ /* p->eflags|=0x100; uncomment to force a trap */
+ return(0);
+ } else {
+ switch(AH) {
+ case 0x12:
+ switch(BL){
+ case 0x32:
+ p->eip=p->nexteip;
+ return(0);
+ break;
+ default:
+ break;
+ }
+ default:
+ break;
+ }
+ printf("unhandled soft interrupt 0x10: vector=%x\n", vector);
+ return(1);
+ }
+}
+
+static int process_softint(x86 * p) {
+#if 0
+ if (p->parm1!=0x10 || AH!=0x0e) {
+ printf("Soft interrupt\n");
+ dump86(p);
+ }
+#endif
+ switch(p->parm1) {
+ case 0x10: /* BIOS video interrupt */
+ return int10h(p);
+ case 0x1a:
+ if(AH==0xb1) return bios86pci(p);
+ break;
+ default:
+ break;
+ }
+ dump86(p);
+ printf("Unhandled soft interrupt number 0x%04x, AX=0x%04x\n",
+ p->parm1, ld_le16(&AX));
+ return(1);
+}
+
+/* The only function called back by the emulator is em86_trap, all
+ instructions may that change the code segment are trapped here.
+ p->reason is one of the following codes. */
+#define code_zerdiv 0
+#define code_trap 1
+#define code_int3 3
+#define code_into 4
+#define code_bound 5
+#define code_ud 6
+#define code_dna 7
+
+#define code_iretw 256
+#define code_iretl 257
+#define code_lcallw 258
+#define code_lcalll 259
+#define code_ljmpw 260
+#define code_ljmpl 261
+#define code_lretw 262
+#define code_lretl 263
+#define code_softint 264
+#define code_lock 265 /* Lock prefix */
+/* Codes 1024 to 2047 are used for I/O port access instructions:
+ - The three LSB define the port size (1, 2 or 4)
+ - bit of weight 512 means out if set, in if clear
+ - bit of weight 256 means ins/outs if set, in/out if clear
+ - bit of weight 128 means use esi/edi if set, si/di if clear
+ (only used for ins/outs instructions, always clear for in/out)
+ */
+#define code_inb 1024+1
+#define code_inw 1024+2
+#define code_inl 1024+4
+#define code_outb 1024+512+1
+#define code_outw 1024+512+2
+#define code_outl 1024+512+4
+#define code_insb_a16 1024+256+1
+#define code_insw_a16 1024+256+2
+#define code_insl_a16 1024+256+4
+#define code_outsb_a16 1024+512+256+1
+#define code_outsw_a16 1024+512+256+2
+#define code_outsl_a16 1024+512+256+4
+#define code_insb_a32 1024+256+128+1
+#define code_insw_a32 1024+256+128+2
+#define code_insl_a32 1024+256+128+4
+#define code_outsb_a32 1024+512+256+128+1
+#define code_outsw_a32 1024+512+256+128+2
+#define code_outsl_a32 1024+512+256+128+4
+
+int em86_trap(x86 *p) {
+#ifndef __BOOT__
+ int i;
+ unsigned char command[80];
+ unsigned char *verb, *t;
+ unsigned short *fp;
+ static unsigned char def=0;
+ static unsigned char * bptaddr=NULL; /* Breakpoint address */
+ static unsigned char bptopc; /* Replaced breakpoint opcode */
+ unsigned char cmd;
+ unsigned tmp;
+#endif
+ switch(p->reason) {
+ case code_int3:
+#ifndef __BOOT__
+ if(p->csbase+p->eip == bptaddr) {
+ *bptaddr=bptopc;
+ bptaddr=NULL;
+ }
+ else printf("Unexpected ");
+#endif
+ printf("Breakpoint Interrupt !\n");
+ /* Note that this fallthrough (no break;) is on purpose */
+#ifdef __BOOT__
+ return 0;
+#else
+ case code_trap:
+ dump86(p);
+ for(;;) {
+ printf("b(reakpoint, g(o, q(uit, s(tack, t(race ? [%c] ", def);
+ fgets(command,sizeof(command),stdin);
+ verb = strtok(command," \n");
+ if(verb) cmd=*verb; else cmd=def;
+ def=0;
+ switch(cmd) {
+ case 'b':
+ case 'B':
+ if(bptaddr) *bptaddr=bptopc;
+ t=strtok(0," \n");
+ i=sscanf(t,"%x",&tmp);
+ if(i==1) {
+ bptaddr=p->vbase + tmp;
+ bptopc=*bptaddr;
+ *bptaddr=0xcc;
+ } else bptaddr=NULL;
+ break;
+ case 'q':
+ case 'Q':
+ return 1;
+ break;
+
+ case 'g':
+ case 'G':
+ p->eflags &= ~0x100;
+ return 0;
+ break;
+
+ case 's':
+ case 'S': /* Print the 8 stack top words */
+ fp = (unsigned short *)(p->ssbase+ld_le16(&SP));
+ printf("Stack [%04x:%04x]: %04x %04x %04x %04x %04x %04x %04x %04x\n",
+ p->ss, ld_le16(&SP),
+ ld_le16(fp+0), ld_le16(fp+1), ld_le16(fp+2), ld_le16(fp+3),
+ ld_le16(fp+4), ld_le16(fp+5), ld_le16(fp+6), ld_le16(fp+7));
+ break;
+ case 't':
+ case 'T':
+ p->eflags |= 0x10100; /* Set the resume and trap flags */
+ def='t';
+ return 0;
+ break;
+ /* Should add some code to edit registers */
+ }
+ }
+#endif
+ break;
+ case code_ud:
+ printf("Attempt to execute an unimplemented"
+ "or undefined opcode!\n");
+ dump86(p);
+ return(1); /* exit interpreter */
+ break;
+ case code_dna:
+ printf("Attempt to execute a floating point instruction!\n");
+ dump86(p);
+ return(1);
+ break;
+ case code_softint:
+ return process_softint(p);
+ break;
+ case code_iretw:
+ p->eip=pop2(p);
+ p->cs=pop2(p);
+ p->csbase=p->vbase + (p->cs<<4);
+ p->eflags= (p->eflags&0xfffe0000)|pop2(p);
+ /* p->eflags|= 0x100; */ /* Uncomment to trap after iretws */
+ return(0);
+ break;
+#ifndef __BOOT__
+ case code_inb:
+ case code_inw:
+ case code_inl:
+ case code_insb_a16:
+ case code_insw_a16:
+ case code_insl_a16:
+ case code_insb_a32:
+ case code_insw_a32:
+ case code_insl_a32:
+ case code_outb:
+ case code_outw:
+ case code_outl:
+ case code_outsb_a16:
+ case code_outsw_a16:
+ case code_outsl_a16:
+ case code_outsb_a32:
+ case code_outsw_a32:
+ case code_outsl_a32:
+ /* For now we simply enable I/O to the ports and continue */
+ for(i=p->parm1; i<p->parm1+(p->reason&7); i++) {
+ p->ioperm[i/8] &= ~(1<<i%8);
+ }
+ printf("Access to ports %04x-%04x enabled.\n",
+ p->parm1, p->parm1+(p->reason&7)-1);
+ return(0);
+#endif
+ case code_lretw:
+ /* Check for the exit eyecatcher */
+ if ( *(u_int *)(p->ssbase+ld_le16(&SP)) == UINT_MAX) return 1;
+ /* No break on purpose */
+ default:
+ dump86(p);
+ printf("em86_trap called with unhandled reason code !\n");
+ return(1);
+
+ }
+}
+
+void cleanup_v86_mess(void) {
+ x86 *p = (x86 *) bd->v86_private;
+
+ /* This automatically removes the mappings ! */
+ vfree(p->vbase);
+ p->vbase = 0;
+ pfree(p->ram);
+ p->ram = 0;
+ sfree(p->ioperm);
+ p->ioperm=0;
+}
+
+int init_v86(void) {
+ x86 *p = (x86 *) bd->v86_private;
+
+ /* p->vbase is non null when the v86 is properly set-up */
+ if (p->vbase) return 0;
+
+ /* Set everything to 0 */
+ memset(p, 0, sizeof(*p));
+ p->ioperm = salloc(65536/8+1);
+ p->ram = palloc(0xa0000);
+ p->iobase = ptr_mem_map->io_base;
+
+ if (!p->ram || !p->ioperm) return 1;
+
+ /* The ioperm array must have an additional byte at the end ! */
+ p->ioperm[65536/8] = 0xff;
+
+ p->vbase = valloc(0x110000);
+ if (!p->vbase) return 1;
+
+ /* These calls should never fail. */
+ vmap(p->vbase, (u_long)p->ram|PTE_RAM, 0xa0000);
+ vmap(p->vbase+0x100000, (u_long)p->ram|PTE_RAM, 0x10000);
+ vmap(p->vbase+0xa0000,
+ ((u_long)ptr_mem_map->isa_mem_base+0xa0000)|PTE_IO, 0x20000);
+ return 0;
+}
+
+void em86_main(struct pci_dev *dev){
+ x86 *p = (x86 *) bd->v86_private;
+ u_short signature;
+ u_char length;
+ volatile u_int *src;
+ u_int *dst, left;
+ uint32_t saved_rom;
+#if defined(MONITOR_IO) && !defined(__BOOT__)
+#define IOMASK 0xff
+#else
+#define IOMASK 0
+#endif
+
+#ifndef __BOOT__
+ int i;
+ /* Allow or disable access to all ports */
+ for(i=0; i<65536/8; i++) p->ioperm[i]=IOMASK;
+ p->ioperm[i] = 0xff; /* Last unused byte must have this value */
+#endif
+ p->dev = dev;
+ memset(p->vbase, 0, 0xa0000);
+ /* Set up a few registers */
+ p->cs = 0xc000; p->csbase = p->vbase + 0xc0000;
+ p->ss = 0x1000; p->ssbase = p->vbase + 0x10000;
+ p->eflags=0x200;
+ st_le16(&SP,0xfffc); p->eip=3;
+
+ p->dsbase = p->esbase = p->fsbase = p->gsbase = p->vbase;
+
+ /* Follow the PCI BIOS specification */
+ AH=dev->bus->number;
+ AL=dev->devfn;
+
+ /* All other registers are irrelevant except ES:DI which
+ * should point to a PnP installation check block. This
+ * is not yet implemented due to lack of references. */
+
+ /* Store a return address of 0xffff:0xffff as eyecatcher */
+ *(u_int *)(p->ssbase+ld_le16(&SP)) = UINT_MAX;
+
+ /* Interrupt for BIOS EGA services is 0xf000:0xf065 (int 0x10) */
+ st_le32((uint32_t *)p->vbase + 0x10, 0xf000f065);
+
+ /* Enable the ROM, read it and disable it immediately */
+ pci_bootloader_read_config_dword(dev, PCI_ROM_ADDRESS, &saved_rom);
+ pci_bootloader_write_config_dword(dev, PCI_ROM_ADDRESS, 0x000c0001);
+
+ /* Check that there is an Intel ROM. Should we also check that
+ * the first instruction is a jump (0xe9 or 0xeb) ?
+ */
+ signature = *(u_short *)(ptr_mem_map->isa_mem_base+0xc0000);
+ if (signature!=0x55aa) {
+ printf("bad signature: %04x.\n", signature);
+ return;
+ }
+ /* Allocate memory and copy the video rom to vbase+0xc0000; */
+ length = ptr_mem_map->isa_mem_base[0xc0002];
+ p->rom = palloc(length*512);
+ if (!p->rom) return;
+
+ for(dst=(u_int *) p->rom,
+ src=(volatile u_int *)(ptr_mem_map->isa_mem_base+0xc0000),
+ left = length*512/sizeof(u_int);
+ left--;
+ *dst++=*src++);
+
+ /* Disable the ROM and map the copy in virtual address space, note
+ * that the ROM has to be mapped as RAM since some BIOSes (at least
+ * Cirrus) perform write accesses to their own ROM. The reason seems
+ * to be that they check that they must execute from shadow RAM
+ * because accessing the ROM prevents accessing the video RAM
+ * according to comments in linux/arch/alpha/kernel/bios32.c.
+ */
+
+ pci_bootloader_write_config_dword(dev, PCI_ROM_ADDRESS, saved_rom);
+ vmap(p->vbase+0xc0000, (u_long)p->rom|PTE_RAM, length*512);
+
+ /* Now actually emulate the ROM init routine */
+ em86_enter(p);
+
+ /* Free the acquired resources */
+ vunmap(p->vbase+0xc0000);
+ pfree(p->rom);
+}
diff --git a/bsps/powerpc/motorola_powerpc/bootloader/em86real.S b/bsps/powerpc/motorola_powerpc/bootloader/em86real.S
new file mode 100644
index 0000000000..e640541fe2
--- /dev/null
+++ b/bsps/powerpc/motorola_powerpc/bootloader/em86real.S
@@ -0,0 +1,4553 @@
+/*
+ * em86real.S
+ *
+ * Copyright (C) 1998, 1999 Gabriel Paubert, paubert@iram.es
+ *
+ * Modified to compile in RTEMS development environment
+ * by Eric Valette
+ *
+ * Copyright (C) 1999 Eric Valette. valette@crf.canon.fr
+ *
+ * The license and distribution terms for this file may be
+ * found in the file LICENSE in this distribution or at
+ * http://www.rtems.org/license/LICENSE.
+ */
+
+/* If the symbol __BOOT__ is defined, a slightly different version is
+ * generated to be compiled with the -m relocatable option
+ */
+
+#ifdef __BOOT__
+#include "bootldr.h"
+/* It is impossible to gather statistics in the boot version */
+#undef EIP_STATS
+#endif
+
+/*
+ *
+ * Given the size of this code, it deserves a few comments on how it works,
+ * and why it was implemented the way it is.
+ *
+ * The goal is to have a real mode i486SX emulator to initialize hardware,
+ * mostly graphics boards, by interpreting ROM BIOSes. The choice of a 486SX
+ * is logical since this is the lowest processor that PCI ROM BIOSes must run
+ * on.
+ *
+ * The goal of this emulator is not performance, but a small enough memory
+ * footprint to include it in a bootloader.
+ *
+ * It is actually likely to be comparable to a 25MHz 386DX on a 200MHz 603e !
+ * This is not as serious as it seems since most of the BIOS code performs
+ * a lot of accesses to I/O and non-cacheable memory spaces. For such
+ * instructions, the execution time is often dominated by bus accesses.
+ * Statistics of the code also shows that it spends a large function of
+ * the time in loops waiting for vertical retrace or programs one of the
+ * timers and waits for the count to go down to zero. This type of loop
+ * runs emulated at the same speed as on 5 GHz Pentium IV++ ;)
+ *
+ */
+
+/*
+ * Known bugs or differences with a real 486SX (real mode):
+ * - segment limits are not enforced (too costly)
+ * - xchg instructions with memory are not locked
+ * - lock prefixes are not implemented at all
+ * - long divides implemented but perhaps still buggy
+ * - miscellaneous system instructions not implemented
+ * (some probably cannot be implemented)
+ * - neither control nor debug registers are implemented for the time being
+ * (debug registers are impossible to implement at a reasonable cost)
+ */
+
+/* Code options, put them on the compiler command line */
+/* #define EIP_STATS */ /* EIP based profiling */
+/* #undef EIP_STATS */
+
+/*
+ * Implementation notes:
+ *
+ * A) flags emulation.
+ *
+ * The most important decisions when it comes to obtain a reasonable speed
+ * are related to how the EFLAGS register is emulated.
+ *
+ * Note: the code to set up flags is complex, but it is only seldom
+ * executed since cmp and test instructions use much faster flag evaluation
+ * paths. For example the overflow flag is almost only needed for pushf and
+ * int. Comparison results only involve (SF^OF) or (SF^OF)+ZF and the
+ * implementation is fast in this case.
+ *
+ * Rarely used flags: AC, NT and IOPL are kept in a memory EFLAGS image.
+ * All other flags are either kept explicitly in PPC cr (DF, IF, and TF) or
+ * lazily evaluated from the state of 4 registers called flags, result, op1,
+ * op2, and sometimes the cr itself. The emulation has been designed for
+ * minimal overhead for the common case where the flags are never used. With
+ * few exceptions, all instructions that set flags leave the result of the
+ * computation in a register called result, and operands are taken from op1
+ * and op2 registers. However a few instructions like cmp, test and bit tests
+ * (bt/btc/btr/bts/bsf/bsr) explicitly set cr bits to short circuit
+ * condition code evaluation of conditional instructions.
+ *
+ * As a very brief summary:
+ *
+ * - the result of the last flag setting operation is often either in the
+ * result register or in op2 after increment or decrement instructions
+ * because result and op1 may be needed to compute the carry.
+ *
+ * - compare instruction leave the result of the unsigned comparison
+ * in cr4 and of signed comparison in cr6. This means that:
+ * - cr4[0]=CF (short circuit for jc/jnc)
+ * - cr4[1]=~(CF+ZF) (short circuit for ja/jna)
+ * - cr6[0]=(OF^SF) (short circuit for jl/jnl)
+ * - cr6[1]=~((SF^OF)+ZF) (short circuit for jg/jng)
+ * - cr6[2]=ZF (short circuit for jz/jnz)
+ *
+ * - test instruction set flags in cr6 and clear overflow. This means that:
+ * - cr6[0]=SF=(SF^OF) (short circuit for jl/jnl/js/jns)
+ * - cr6[1]=~((SF^OF)+ZF) (short circuit for jg/jng)
+ * - cr6[2]=ZF (short circuit for jz/jnz)
+ *
+ * All flags may be lazily evaluated from several values kept in registers:
+ *
+ * Flag: Depends upon:
+ * OF result, op1, op2, flags[INCDEC_FIELD,SUBTRACTING,OF_STATE_MASK]
+ * SF result, op2, flags[INCDEC_FIELD,RES_SIZE]
+ * ZF result, op2, cr6[2], flags[INCDEC_FIELD,RES_SIZE,ZF_PROTECT]
+ * AF op1, op2, flags[INCDEC_FIELD,SUBTRACTING,CF_IN]
+ * PF result, op2, flags[INCDEC_FIELD]
+ * CF result, op1, flags[CF_STATE_MASK, CF_IN]
+ *
+ * The order of the fields in the flags register has been chosen so that a
+ * single rlwimi is necessary for common instruction that do not affect all
+ * flags. (See the code for inc/dec emulation).
+ *
+ *
+ * B) opcodes and prefixes.
+ *
+ * The register called opcode holds in its low order 8 bits the opcode
+ * (second byte if the first byte is 0x0f). More precisely it holds the
+ * last byte fetched before the modrm byte or the immediate operand(s)
+ * of the instruction, if any. High order 24 bits are zero unless the
+ * instruction has prefixes. These higher order bits have the following
+ * meaning:
+ * 0x80000000 segment override prefix
+ * 0x00001000 repnz prefix (0xf2)
+ * 0x00000800 repz prefix (0xf3)
+ * 0x00000400 address size prefix (0x67)
+ * 0x00000200 operand size prefix (0x66)
+ * (bit 0x1000 and 0x800 cannot be set simultaneously)
+ *
+ * Therefore if there is a segment override the value will be between very
+ * negative (between 0x80000000 and 0x800016ff), if there is no segment
+ * override, the value will be between 0 and 0x16ff. The reason for
+ * this choice will be understood in the next part.
+ *
+ * C) addresing mode description tables.
+ *
+ * the encoding of the modrm bytes (especially in 16 bit mode) is quite
+ * complex. Hence a table, indexed by the five useful bits of the modrm
+ * byte is used to simplify decoding. Here is a description:
+ *
+ * bit mask meaning
+ * 0x80000000 use ss as default segment register
+ * 0x00004000 means that this addressing mode needs a base register
+ * (set for all entries except sib and displacement-only)
+ * 0x00002000 set if preceding is not set
+ * 0x00001000 set if an sib follows
+ * 0x00000700 base register to use (16 and 32 bit)
+ * 0x00000080 set in 32 bit addressing mode table, cleared in 16 bit
+ * (so extsb mask,entry; ori mask,mask,0xffff gives a mask)
+ * 0x00000070 kludge field, possible values are
+ * 0: 16 bit addressing mode without index
+ * 10: 32 bit addressing mode
+ * 60: 16 bit addressing mode with %si as index
+ * 70: 16 bit addressing mode with %di as index
+ *
+ * This convention leads to the following special values used to check for
+ * sib present and displacement-only, which happen to the three lowest
+ * values in the table (unsigned):
+ * 0x00003090 sib follows (implies it is a 32 bit mode)
+ * 0x00002090 32 bit displacement-only
+ * 0x00002000 16 bit displacement-only
+ *
+ * This means that all entries are either very negative in the 0x80002000
+ * range if the segment defaults to ss or higher than 0x2000 if it defaults
+ * to ds. Combined with the value in opcode this gives the following table:
+ * opcode entry entry>opcode ? segment to use
+ * positive positive yes ds (default)
+ * negative positive yes overriden by prefix
+ * positive negative no ss
+ * negative negative yes overridden by prefix
+ *
+ * Hence a simple comparison allows to check for the need to override
+ * the current base with ss, i.e., when ss is the default base and the
+ * instruction has no override prefix.
+ *
+ * D) BUGS
+ *
+ * This software is obviously bug-free :-). Nevertheless, if you encounter
+ * an interesting feature. Mail me a note, if possible with a detailed
+ * instruction example showing where and how it fails.
+ *
+ */
+
+/* Now the details of flag evaluation with the necessary macros */
+
+/* Alignment check is toggable so the system believes it is a 486, but
+CPUID is not to avoid unnecessary complexities. However, alignment
+is actually never checked (real mode is CPL 0 anyway). */
+#define AC86 13 /* Can only be toggled */
+#define VM86 14 /* Not used for now */
+#define RF86 15 /* Not emulated precisely */
+/* Actually NT and IOPL are kept in memory */
+#define NT86 17
+#define IOPL86 18 /* Actually 18 and 19 */
+#define OF86 20
+#define DF86 21
+#define IF86 22
+#define TF86 23
+#define SF86 24
+#define ZF86 25
+#define AF86 27
+#define PF86 29
+#define CF86 31
+
+/* Where the less important flags are placed in PPC cr */
+#define RF 20 /* Suppress trap flag: cr5[0] */
+#define DF 21 /* Direction flag: cr5[1] */
+#define IF 22 /* Interrupt flag: cr5[2] */
+#define TF 23 /* Single step flag: cr5[3] */
+
+/* Now the flags which are frequently used */
+/*
+ * CF_IN is a copy of the input carry with PPC polarity,
+ * it is cleared for add, set for sub and cmp,
+ * equal to the x86 carry for adc and to its complement for sbb.
+ * it is used to evaluate AF and CF.
+ */
+#define CF_IN 0x80000000
+
+/* #define GET_CF_IN(dst) rlwinm dst,flags,1,0x01 */
+
+/* CF_IN_CR set in flags means that cr4[0] is a copy of carry bit */
+#define CF_IN_CR 0x40000000
+
+#define EVAL_CF andis. r3,flags,(CF_IN_CR)>>16; beql- _eval_cf
+
+/*
+ * CF_STATE tells how to compute the carry bit.
+ * NOTRESULT16 and NOTRESULT8 are never set explicitly,
+ * but they may happen after a cmc instruction.
+ */
+#define CF 16 /* cr4[0] */
+#define CF_LOCATION 0x30000000
+#define CF_ZERO 0x00000000
+#define CF_EXPLICIT 0x00000000
+#define CF_COMPLEMENT 0x08000000 /* Indeed a polarity bit */
+#define CF_STATE_MASK (CF_LOCATION|CF_COMPLEMENT)
+#define CF_VALUE 0x08000000
+#define CF_SET 0x08000000
+#define CF_RES32 0x10000000
+#define CF_NOTRES32 0x18000000
+#define CF_RES16 0x20000000
+#define CF_NOTRES16 0x28000000
+#define CF_RES8 0x30000000
+#define CF_NOTRES8 0x38000000
+
+#define CF_ADDL CF_RES32
+#define CF_SUBL CF_NOTRES32
+#define CF_ADDW CF_RES16
+#define CF_SUBW CF_RES16
+#define CF_ADDB CF_RES8
+#define CF_SUBB CF_RES8
+
+#define CF_ROTCNT(dst) rlwinm dst,flags,7,0x18
+#define CF_POL(dst,pos) rlwinm dst,flags,(36-pos)%32,pos,pos
+#define CF_POL_INSERT(dst,pos) \
+ rlwimi dst,flags,(36-pos)%32,pos,pos
+#define RES2CF(dst) rlwinm dst,result,8,7,15
+
+/*
+ * OF_STATE tells how to compute the overflow bit. When the low order bit
+ * is set (OF_EXPLICIT), it means that OF is the exclusive or of the
+ * two other bits. For the reason of this choice, see rotate instructions.
+ */
+#define OF 1 /* Only after EVAL_OF */
+#define OF_STATE_MASK 0x07000000
+#define OF_INCDEC 0x00000000
+#define OF_EXPLICIT 0x01000000
+#define OF_ZERO 0x01000000
+#define OF_VALUE 0x04000000
+#define OF_SET 0x04000000
+#define OF_ONE 0x05000000
+#define OF_XOR 0x06000000
+#define OF_ARITHL 0x06000000
+#define OF_ARITHW 0x02000000
+#define OF_ARITHB 0x04000000
+
+#define EVAL_OF rlwinm. r3,flags,6,0,1; bngl+ _eval_of; andis. r3,flags,OF_VALUE>>16
+
+/* See _eval_of to see how this can be used */
+#define OF_ROTCNT(dst) rlwinm dst,flags,10,0x1c
+
+/*
+ * SIGNED_IN_CR means that cr6 is set as after a signed compare:
+ * - cr6[0] is SF^OF for jl/jnl/setl/setnl...
+ * - cr6[1] is ~((SF^OF)+ZF) for jg/jng/setg/setng...
+ * - cr6[2] is ZF (ZF_IN_CR is always set if this bit is set)
+ */
+#define SLT 24 /* cr6[0], signed less than */
+#define SGT 25 /* cr6[1], signed greater than */
+#define SIGNED_IN_CR 0x00800000
+
+#define EVAL_SIGNED andis. r3,flags,SIGNED_IN_CR>>16; beql- _eval_signed
+
+/*
+ * Above in CR means that cr4 is set as after an unsigned compare:
+ * - cr4[0] is CF (CF_IN_CR is also set)
+ * - cr4[1] is ~(CF+ZF) (ZF_IN_CR is also set)
+ */
+#define ABOVE 17 /* cr4[1] */
+#define ABOVE_IN_CR 0x00400000
+
+#define EVAL_ABOVE andis. r3,flags,ABOVE_IN_CR>>16; beql- _eval_above
+
+/* SF_IN_CR means cr6[0] is a copy of SF. It implies ZF_IN_CR is also set */
+#define SF 24 /* cr6[0] */
+#define SF_IN_CR 0x00200000
+
+#define EVAL_SF andis. r3,flags,SF_IN_CR>>16; beql- _eval_sf_zf
+
+/* ZF_IN_CR means cr6[2] is a copy of ZF. */
+#define ZF 26
+#define ZF_IN_CR 0x00100000
+
+#define EVAL_ZF andis. r3,flags,ZF_IN_CR>>16; beql- _eval_sf_zf
+#define ZF2ZF86(s,d) rlwimi d,s,ZF-ZF86,ZF86,ZF86
+#define ZF862ZF(reg) rlwimi reg,reg,32+ZF86-ZF,ZF,ZF
+
+/*
+ * ZF_PROTECT means cr6[2] is the only valid value for ZF. This is necessary
+ * because some infrequent instructions may leave SF and ZF in an apparently
+ * inconsistent state (both set): sahf, popf and the few (not implemented)
+ * instructions that only affect ZF.
+ */
+#define ZF_PROTECT 0x00080000
+
+/* The parity is always evaluated when it is needed */
+#define PF 0 /* Only after EVAL_PF */
+#define EVAL_PF bl _eval_pf
+
+/* This field gives the shift amount to use to evaluate SF
+ and ZF when ZF_PROTECT is not set */
+#define RES_SIZE_MASK 0x00060000
+#define RESL 0x00000000
+#define RESW 0x00040000
+#define RESB 0x00060000
+
+#define RES_SHIFT(dst) rlwinm dst,flags,18,0x18
+
+/* SUBTRACTING is set if the last flag setting instruction was sub/sbb/cmp,
+ used to evaluate OF and AF */
+#define SUBTRACTING 0x00010000
+
+#define GET_ADDSUB(dst) rlwinm dst,flags,16,0x01
+
+/* rotate (rcl/rcr/rol/ror) affect CF and OF but not other flags */
+#define ROTATE_MASK (CF_IN_CR|CF_STATE_MASK|ABOVE_IN_CR|OF_STATE_MASK|SIGNED_IN_CR)
+#define ROTATE_FLAGS rlwimi flags,one,24,ROTATE_MASK
+
+/*
+ * INCDEC_FIELD has at most one bit set when the last flag setting instruction
+ * was either inc or dec (which do not affect the carry). When one of these
+ * bits is set, it affects the way OF, SF, ZF, AF, and PF are evaluated.
+ */
+#define INCDEC_FIELD 0x0000ff00
+
+#define DECB_SHIFT 8
+#define INCB_SHIFT 9
+#define DECW_SHIFT 10
+#define INCW_SHIFT 11
+#define DECL_SHIFT 14
+#define INCL_SHIFT 15
+
+#define INCDEC_MASK (OF_STATE_MASK|SIGNED_IN_CR|ABOVE_IN_CR|SF_IN_CR|\
+ ZF_IN_CR|ZF_PROTECT|RES_SIZE_MASK|SUBTRACTING|\
+ INCDEC_FIELD)
+/* Operations to perform to tell where the flags are after inc or dec */
+#define INC_FLAGS(BWL) rlwimi flags,one,INC##BWL##_SHIFT,INCDEC_MASK
+#define DEC_FLAGS(BWL) rlwimi flags,one,DEC##BWL##_SHIFT,INCDEC_MASK
+
+/* How the flags are set after arithmetic operations */
+#define FLAGS_ADD(BWL) (CF_ADD##BWL|OF_ARITH##BWL|RES##BWL)
+#define FLAGS_SBB(BWL) (CF_SUB##BWL|OF_ARITH##BWL|RES##BWL|SUBTRACTING)
+#define FLAGS_SUB(BWL) FLAGS_SBB(BWL)|CF_IN
+#define FLAGS_CMP(BWL) FLAGS_SUB(BWL)|ZF_IN_CR|CF_IN_CR|SIGNED_IN_CR|ABOVE_IN_CR
+
+/* How the flags are set after logical operations */
+#define FLAGS_LOG(BWL) (CF_ZERO|OF_ZERO|RES##BWL)
+#define FLAGS_TEST(BWL) FLAGS_LOG(BWL)|ZF_IN_CR|SIGNED_IN_CR|SF_IN_CR
+
+/* How the flags are set after bt/btc/btr/bts. */
+#define FLAGS_BTEST CF_IN_CR|CF_ADDL|OF_ZERO|RESL
+
+/* How the flags are set after bsf/bsr. */
+#define FLAGS_BSRCH(WL) CF_ZERO|OF_ZERO|RES##WL|ZF_IN_CR
+
+/* How the flags are set after logical right shifts */
+#define FLAGS_SHR(BWL) (CF_EXPLICIT|OF_ARITH##BWL|RES##BWL)
+
+/* How the flags are set after double length shifts */
+#define FLAGS_DBLSH(WL) (CF_EXPLICIT|OF_ARITH##WL|RES##WL)
+
+/* How the flags are set after multiplies */
+#define FLAGS_MUL (CF_EXPLICIT|OF_EXPLICIT)
+
+#define SET_FLAGS(fl) lis flags,(fl)>>16
+#define ADD_FLAGS(fl) addis flags,flags,(fl)>>16
+
+/*
+ * We are always off by one when compared with Intel's eip, this shortens
+ * code by allowing to load next byte with lbzu x,1(eip). The register
+ * called eip actually contains csbase+eip, and thus should be called lip
+ * for linear ip.
+ */
+
+/*
+ * Reason codes passed to the C part of the emulator, this includes all
+ * instructions which may change the current code segment. These definitions
+ * will soon go into a separate include file. Codes 0 to 255 correspond
+ * directly to the interrupt/trap that has to be generated.
+ */
+
+#define code_divide_err 0
+#define code_trap 1
+#define code_int3 3
+#define code_into 4
+#define code_bound 5
+#define code_ud 6
+#define code_dna 7 /* FPU not available */
+
+#define code_iretw 256 /* Interrupt returns */
+#define code_iretl 257
+#define code_lcallw 258 /* Far calls and jumps */
+#define code_lcalll 259
+#define code_ljmpw 260
+#define code_ljmpl 261
+#define code_lretw 262 /* Far returns */
+#define code_lretl 263
+#define code_softint 264 /* int $xx */
+#define code_lock 265 /* Lock prefix */
+/* Codes 1024 to 2047 are used for I/O port access instructions:
+ - The three LSB define the port size (1, 2 or 4)
+ - bit of weight 512 means out if set, in if clear
+ - bit of weight 256 means ins/outs if set, in/out if clear
+ - bit of weight 128 means use 32 bit addresses if set, 16 bit if clear
+ (only used for ins/outs instructions, always clear for in/out)
+ */
+#define code_inb 1024+1
+#define code_inw 1024+2
+#define code_inl 1024+4
+#define code_outb 1024+512+1
+#define code_outw 1024+512+2
+#define code_outl 1024+512+4
+#define code_insb_a16 1024+256+1
+#define code_insw_a16 1024+256+2
+#define code_insl_a16 1024+256+4
+#define code_outsb_a16 1024+512+256+1
+#define code_outsw_a16 1024+512+256+2
+#define code_outsl_a16 1024+512+256+4
+#define code_insb_a32 1024+256+128+1
+#define code_insw_a32 1024+256+128+2
+#define code_insl_a32 1024+256+128+4
+#define code_outsb_a32 1024+512+256+128+1
+#define code_outsw_a32 1024+512+256+128+2
+#define code_outsl_a32 1024+512+256+128+4
+
+#define state 31
+/* r31 (state) is a pointer to a structure describing the emulated x86
+processor, its layout is the following:
+
+first the general purpose registers, they are in little endian byte order
+
+offset name
+
+ 0 eax/ax/al
+ 1 ah
+ 4 ecx/cx/cl
+ 5 ch
+ 8 edx/dx/dl
+ 9 dh
+ 12 ebx/bx/bl
+ 13 bh
+ 16 esp/sp
+ 20 ebp/bp
+ 24 esi/si
+ 28 edi/di
+*/
+
+#define AL 0
+#define AX 0
+#define EAX 0
+#define AH 1
+#define CL 4
+#define CX 4
+#define ECX 4
+#define DX 8
+#define EDX 8
+#define BX 12
+#define EBX 12
+#define SP 16
+#define ESP 16
+#define BP 20
+#define EBP 20
+#define SI 24
+#define ESI 24
+#define DI 28
+#define EDI 28
+
+/*
+than the rest of the machine state, big endian !
+
+offset name
+
+ 32 essel segment register selectors (values)
+ 36 cssel
+ 40 sssel
+ 44 dssel
+ 48 fssel
+ 52 gssel
+ 56 eipimg true eip (register named eip is csbase+eip)
+ 60 eflags eip and eflags only valid when C code running !
+ 64 esbase segment registers bases
+ 68 csbase
+ 72 ssbase
+ 76 dsbase
+ 80 fsbase
+ 84 gsbase
+ 88 iobase For I/O instructions, I/O space virtual base
+ 92 ioperm I/O permission bitmap pointer
+ 96 reason Reason code when calling external emulator
+ 100 nexteip eip past instruction for external emulator
+ 104 parm1 parameter for external emulator
+ 108 parm2 parameter for external emulator
+ 112 _opcode current opcode register for external emulator
+ 116 _base segment register base for external emulator
+ 120 _offset intruction operand offset
+ More internal state was dumped here for debugging in first versions
+
+ 128 vbase where the 1Mb memory is mapped
+ 132 cntimg instruction counter
+ 136 scratch
+ 192 eipstat array of 32k unsigned long pairs for eip stats
+*/
+
+#define essel 32
+#define cssel 36
+#define sssel 40
+#define dssel 44
+#define fssel 48
+#define gssel 52
+#define eipimg 56
+#define eflags 60
+#define esbase 64
+#define csbase 68
+#define ssbase 72
+#define dsbase 76
+#define fsbase 80
+#define gsbase 84
+#define iobase 88
+#define ioperm 92
+#define reason 96
+#define nexteip 100
+#define parm1 104
+#define parm2 108
+#define _opcode 112
+#define _base 116
+#define _offset 120
+#define vbase 128
+#define cntimg 132
+#ifdef EIP_STATS
+#define eipstat 192
+#endif
+/* Global registers */
+
+/* Some segment register bases are permanently kept in registers since they
+are often used: these are csb, esb and ssb because they are
+required for jumps, string instructions, and pushes/pops/calls/rets.
+dsbase is not kept in a register but loaded from memory to allow somewhat
+more parallelism in the main emulation loop.
+*/
+
+#define one 30 /* Constant one, so pervasive */
+#define ssb 29
+#define csb 28
+#define esb 27
+#define eip 26 /* That one is indeed csbase+(e)ip-1 */
+#define result 25 /* For the use of result, op1, op2 */
+#define op1 24 /* see the section on flag emulation */
+#define op2 23
+#define opbase 22 /* default opcode table */
+#define flags 21 /* See earlier description */
+#define opcode 20 /* Opcode */
+#define opreg 19 /* Opcode extension/register number */
+/* base is reloaded with the base of the ds segment at the beginning of
+every instruction, it is modified by segment override prefixes, when
+the default base segment is ss, or when the modrm byte specifies a
+register operand */
+#define base 18 /* Instruction's operand segment base */
+#define offset 17 /* Instruction's memory operand offset */
+/* used to address a table telling how to decode the addressing mode
+specified by the modrm byte */
+#define adbase 16 /* addressing mode table */
+/* Following registers are used only as dedicated temporaries during decoding,
+they are free for use during emulation */
+/*
+ * ceip (current eip) is only in use when we call the external emulator for
+ * instructions that fault. Note that it is forbidden to change flags before
+ * the check for the fault happens (divide by zero...) ! ceip is also used
+ * when measuring timing.
+ */
+#define ceip 15
+
+/* A register used to measure timing information (when enabled) */
+#ifdef EIP_STATS
+#define tstamp 14
+#endif
+
+#define count 12 /* Instruction counter. */
+
+#define r0 0
+#define r1 1 /* PPC Stack pointer. */
+#define r3 3
+#define r4 4
+#define r5 5
+#define r6 6
+#define r7 7
+
+/* Macros to read code stream */
+#define NEXTBYTE(dest) lbzu dest,1(eip)
+#define NEXTWORD(dest) lhbrx dest,eip,one; la eip,2(eip)
+#define NEXTDWORD(dest) lwbrx dest,eip,one; la eip,4(eip)
+#define NEXT b nop
+#define GOTNEXT b gotopcode
+
+#ifdef __BOOT__
+ START_GOT
+ GOT_ENTRY(_jtables)
+ GOT_ENTRY(jtab_www)
+ GOT_ENTRY(adtable)
+ END_GOT
+#else
+ .text
+#endif
+ .align 2
+ .global em86_enter
+ .type em86_enter,@function
+em86_enter: stwu r1,-96(r1) # allocate stack
+ mflr r0
+ stmw 14,24(r1)
+ mfcr r4
+ stw r0,100(r1)
+ mr state,r3
+ stw r4,20(r1)
+#ifdef __BOOT__
+/* We need this since r30 is the default GOT pointer */
+#define r30 30
+ GET_GOT
+/* The relocation of these tables is explicit, this could be done
+ * automatically with fixups but would add more than 8kb in the fixup tables.
+ */
+ lwz r3,GOT(_jtables)
+ lwz r4,_endjtables-_jtables(r3)
+ sub. r4,r3,r4
+ beq+ 1f
+ li r0,((_endjtables-_jtables)>>2)+1
+ addi r3,r3,-4
+ mtctr r0
+0: lwzu r5,4(r3)
+ add r5,r5,r4
+ stw r5,0(r3)
+ bdnz 0b
+1: lwz adbase,GOT(adtable)
+ lwz opbase,GOT(jtab_www)
+/* Now r30 is only used as constant 1 */
+#undef r30
+ li one,1 # pervasive constant
+#else
+ lis opbase,jtab_www@ha
+ lis adbase,adtable@ha
+ li one,1 # pervasive constant
+ addi opbase,opbase,jtab_www@l
+ addi adbase,adbase,adtable@l
+#ifdef EIP_STATS
+ li ceip,0
+ mftb tstamp
+#endif
+#endif
+/* We branch back here when calling an external function tells us to resume */
+restart: lwz r3,eflags(state)
+ lis flags,(OF_EXPLICIT|ZF_IN_CR|ZF_PROTECT|SF_IN_CR)>>16
+ lwz csb,csbase(state)
+ extsb result,r3 # SF/PF
+ rlwinm op1,r3,31,0x08 # AF
+ lwz eip,eipimg(state)
+ ZF862ZF(r3) # cr6
+ addi op2,op1,0 # AF
+ lwz ssb,ssbase(state)
+ rlwimi flags,r3,15,OF_VALUE # OF
+ rlwimi r3,r3,32+RF86-RF,RF,RF # RF
+ lwz esb,esbase(state)
+ ori result,result,0xfb # PF
+ mtcrf 0x06,r3 # RF/DF/IF/TF/SF/ZF
+ lbzux opcode,eip,csb
+ rlwimi flags,r3,27,CF_VALUE # CF
+ xori result,result,0xff # PF
+ lwz count,cntimg(state)
+ GOTNEXT # start the emulator
+
+/* Now return */
+exit: lwz r0,100(r1)
+ lwz r4,20(r1)
+ mtlr r0
+ lmw 14,24(r1)
+ mtcr r4
+ addi r1,r1,96
+ blr
+
+trap: crmove 0,RF
+ crclr RF
+ bt- 0,resume
+ sub ceip,eip,csb
+ li r3,code_trap
+complex: addi eip,eip,1
+ stw r3,reason(state)
+ sub eip,eip,csb
+ stw op1,240(state)
+ stw op2,244(state)
+ stw result,248(state)
+ stw flags,252(state)
+ stw r4,parm1(state)
+ stw r5,parm2(state)
+ stw opcode,_opcode(state)
+ bl _eval_flags
+ stw base,_base(state)
+ stw eip,nexteip(state)
+ stw r3,eflags(state)
+ mr r3,state
+ stw offset,_offset(state)
+ stw ceip,eipimg(state)
+ stw count,cntimg(state)
+ bl em86_trap
+ cmpwi r3,0
+ bne exit
+ b restart
+
+/* Main loop */
+/*
+ * The two LSB of each entry in the main table mean the following:
+ * 00: indirect opcode: modrm follows and the three middle bits are an
+ * opcode extension. The entry points to another jump table.
+ * 01: direct instruction, branch directly to the routine.
+ * 10: modrm specifies byte size memory and register operands.
+ * 11: modrm specifies word/long memory and register operands.
+ *
+ * The modrm byte, if present, is always loaded in r7.
+ *
+ * Note: most "mr x,y" instructions have been replaced by "addi x,y,0" since
+ * the latter can be executed in the second integer unit on 603e.
+ */
+
+/*
+ * This code is very good example of absolutely unmaintainable code.
+ * It was actually much easier to write than it is to understand !
+ * If my computations are right, the maximum path length from fetching
+ * the opcode to exiting to the actual instruction execution is
+ * 46 instructions (for non-prefixed, single byte opcode instructions).
+ *
+ */
+ .align 5
+#ifdef EIP_STATS
+nop: NEXTBYTE(opcode)
+gotopcode: slwi r3,opcode,2
+ bt- TF,trap
+resume: lwzx r4,opbase,r3
+ addi r5,state,eipstat+4
+ clrlslwi r6,ceip,17,3
+ mtctr r4
+ lwzux r7,r5,r6
+ slwi. r0,r4,30 # two lsb of table entry
+ sub r7,r7,tstamp
+ lwz r6,-4(r5)
+ mftb tstamp
+ addi r6,r6,1
+ sub ceip,eip,csb
+ stw r6,-4(r5)
+ add r7,r7,tstamp
+ lwz base,dsbase(state)
+ stw r7,0(r5)
+#else
+nop: NEXTBYTE(opcode)
+gotopcode: slwi r3,opcode,2
+ bt- TF,trap
+resume: lwzx r4,opbase,r3
+ sub ceip,eip,csb
+ mtctr r4
+ slwi. r0,r4,30 # two lsb of table entry
+ lwz base,dsbase(state)
+ addi count,count,1
+#endif
+ bgtctr- # for instructions without modrm
+
+/* modrm byte present */
+ NEXTBYTE(r7) # modrm byte
+ cmplwi cr1,r7,192
+ rlwinm opreg,r7,31,0x1c
+ beq- cr0,8f # extended opcode
+/* modrm with middle 3 bits specifying a register (non prefixed) */
+ rlwinm r0,r4,3,0x8
+ li r4,0x1c0d
+ rlwimi opreg,r7,27,0x01
+ srw r4,r4,r0
+ and opreg,opreg,r4
+ blt cr1,9f
+/* modrm with 2 register operands */
+1: rlwinm offset,r7,2,0x1c
+ addi base,state,0
+ rlwimi offset,r7,30,0x01
+ and offset,offset,r4
+ bctr
+
+/* Prefixes: first segment overrides */
+ .align 4
+_es: NEXTBYTE(r7); addi base,esb,0
+ oris opcode,opcode,0x8000; b 2f
+_cs: NEXTBYTE(r7); addi base,csb,0
+ oris opcode,opcode,0x8000; b 2f
+_fs: NEXTBYTE(r7); lwz base,fsbase(state)
+ oris opcode,opcode,0x8000; b 2f
+_gs: NEXTBYTE(r7); lwz base,gsbase(state)
+ oris opcode,opcode,0x8000; b 2f
+_ss: NEXTBYTE(r7); addi base,ssb,0
+ oris opcode,opcode,0x8000; b 2f
+_ds: NEXTBYTE(r7)
+ oris opcode,opcode,0x8000; b 2f
+
+/* Lock (unimplemented) and repeat prefixes */
+_lock: li r3,code_lock; b complex
+_repnz: NEXTBYTE(r7); rlwimi opcode,one,12,0x1800; b 2f
+_repz: NEXTBYTE(r7); rlwimi opcode,one,11,0x1800; b 2f
+
+/* Operand and address size prefixes */
+ .align 4
+_opsize: NEXTBYTE(r7); ori opcode,opcode,0x200
+ rlwinm r3,opcode,2,0x1ffc; b 2f
+_adsize: NEXTBYTE(r7); ori opcode,opcode,0x400
+ rlwinm r3,opcode,2,0x1ffc; b 2f
+
+_twobytes: NEXTBYTE(r7); addi r3,r3,0x400
+2: rlwimi r3,r7,2,0x3fc
+ lwzx r4,opbase,r3
+ rlwimi opcode,r7,0,0xff
+ mtctr r4
+ slwi. r0,r4,30
+ bgtctr- # direct instruction
+/* modrm byte in a prefixed instruction */
+ NEXTBYTE(r7) # modrm byte
+ cmpwi cr1,r7,192
+ rlwinm opreg,r7,31,0x1c
+ beq- 6f
+/* modrm with middle 3 bits specifying a register (prefixed) */
+ rlwinm r0,r4,3,0x8
+ li r4,0x1c0d
+ rlwimi opreg,r7,27,0x01
+ srw r4,r4,r0
+ and opreg,opreg,r4
+ bnl cr1,1b # 2 register operands
+/* modrm specifying memory with prefix */
+3: rlwinm r3,r3,27,0xff80
+ rlwimi adbase,r7,2,0x1c
+ extsh r3,r3
+ rlwimi r3,r7,31,0x60
+ lwzx r4,r3,adbase
+ cmpwi cr1,r4,0x3090
+ bnl+ cr1,10f
+/* displacement only addressing modes */
+4: cmpwi r4,0x2000
+ bne 5f
+ NEXTWORD(offset)
+ bctr
+5: NEXTDWORD(offset)
+ bctr
+/* modrm with opcode extension (prefixed) */
+6: lwzx r4,r4,opreg
+ mtctr r4
+ blt cr1,3b
+/* modrm with opcode extension and register operand */
+7: rlwinm offset,r7,2,0x1c
+ addi base,state,0
+ rlwinm r0,r4,3,0x8
+ li r4,0x1c0d
+ rlwimi offset,r7,30,0x01
+ srw r4,r4,r0
+ and offset,offset,r4
+ bctr
+/* modrm with opcode extension (non prefixed) */
+8: lwzx r4,r4,opreg
+ mtctr r4
+/* FIXME ? We continue fetching even if the opcode extension is undefined.
+ * It shouldn't do any harm on real mode emulation anyway, and for ROM
+ * BIOS emulation, we are supposed to read valid code.
+ */
+ bnl cr1,7b
+/* modrm specifying memory without prefix */
+9: rlwimi adbase,r7,2,0x1c # memory addressing mode computation
+ rlwinm r3,r7,31,0x60
+ lwzx r4,r3,adbase
+ cmplwi cr1,r4,0x3090
+ blt- cr1,4b # displacement only addressing mode
+10: rlwinm. r0,r7,24,0,1 # three cases distinguished
+ beq- cr1,15f # an sib follows
+ rlwinm r3,r4,30,0x1c # 16bit/32bit/%si index/%di index
+ cmpwi cr1,r3,8 # set cr1 as early as possible
+ rlwinm r6,r4,26,0x1c # base register
+ lwbrx offset,state,r6 # load the base register
+ beq cr0,14f # no displacement
+ cmpw cr2,r4,opcode # check for ss as default base
+ bgt cr0,12f # byte offset
+ beq cr1,11f # 32 bit displacement
+ NEXTWORD(r5) # 16 bit displacement
+ bgt cr1,13f # d16(base,index)
+/* d16(base) */
+ add offset,offset,r5
+ clrlwi offset,offset,16
+ bgtctr cr2
+ addi base,ssb,0
+ bctr
+/* d32(base) */
+11: NEXTDWORD(r5)
+ add offset,offset,r5
+ bgtctr cr2
+ addi base,ssb,0
+ bctr
+/* 8 bit displacement */
+12: NEXTBYTE(r5)
+ extsb r5,r5
+ bgt cr1,13f
+/* d8(base) */
+ extsb r6,r4
+ add offset,offset,r5
+ ori r6,r6,0xffff
+ and offset,offset,r6
+ bgtctr cr2
+ addi base,ssb,0
+ bctr
+/* d8(base,index) and d16(base,index) share this code ! */
+13: lhbrx r3,state,r3
+ add offset,offset,r5
+ add offset,offset,r3
+ clrlwi offset,offset,16
+ bgtctr cr2
+ addi base,ssb,0
+ bctr
+/* no displacement: only indexed modes may use ss as default base */
+14: beqctr cr1 # 32 bit register indirect
+ clrlwi offset,offset,16
+ bltctr cr1 # 16 bit register indirect
+/* (base,index) */
+ lhbrx r3,state,r3 # 16 bit [{bp,bx}+{si,di}]
+ cmpw cr2,r4,opcode # check for ss as default base
+ add offset,offset,r3
+ clrlwi offset,offset,r3
+ bgtctr+ cr2
+ addi base,ssb,0
+ bctr
+/* sib modes, note that the size of the offset can be known from cr0 */
+15: NEXTBYTE(r7) # get sib
+ rlwinm r3,r7,31,0x1c # index
+ rlwinm offset,r7,2,0x1c # base
+ cmpwi cr1,r3,ESP # has index ?
+ bne cr0,18f # base+d8/d32
+ cmpwi offset,EBP
+ beq 17f # d32(,index,scale)
+ xori r4,one,0xcc01 # build 0x0000cc00
+ rlwnm r4,r4,offset,0,1 # 0 or 0xc0000000
+ lwbrx offset,state,offset
+ cmpw cr2,r4,opcode # use ss ?
+ beq- cr1,16f # no index
+/* (base,index,scale) */
+ lwbrx r3,state,r3
+ srwi r6,r7,6
+ slw r3,r3,r6
+ add offset,offset,r3
+ bgtctr cr2
+ addi base,ssb,0
+ bctr
+/* (base), in practice only (%esp) is coded this way */
+16: bgtctr cr2
+ addi base,ssb,0
+ bctr
+/* d32(,index,scale) */
+17: NEXTDWORD(offset)
+ beqctr- cr1 # no index: very unlikely
+ lwbrx r3,state,r3
+ srwi r6,r7,6
+ slw r3,r3,r6
+ add offset,offset,r3
+ bctr
+/* 8 or 32 bit displacement */
+18: xori r4,one,0xcc01 # build 0x0000cc00
+ rlwnm r4,r4,offset,0,1 # 0 or 0xc0000000
+ lwbrx offset,state,offset
+ cmpw cr2,r4,opcode # use ss ?
+ bgt cr0,20f # 8 bit offset
+/* 32 bit displacement */
+ NEXTDWORD(r5)
+ beq- cr1,21f
+/* d(base,index,scale) */
+19: lwbrx r3,state,r3
+ add offset,offset,r5
+ add offset,offset,r3
+ bgtctr cr2
+ addi base,ssb,0
+ bctr
+/* 8 bit displacement */
+20: NEXTBYTE(r5)
+ extsb r5,r5
+ bne+ cr1,19b
+/* d(base), in practice base is %esp */
+21: add offset,offset,r5
+ bgtctr- cr2
+ addi base,ssb,0
+ bctr
+
+/*
+ * Flag evaluation subroutines: they have not been written for performance
+ * since they are not often used in practice. The rule of the game was to
+ * write them with as few branches as possible.
+ * The first routines eveluate either one or 2 (ZF and SF simultaneously)
+ * flags and do not use r0 and r7.
+ * The more complex routines (_eval_above, _eval_signed and _eval_flags)
+ * call the former ones, using r0 as a return address save register and
+ * r7 as a safe temporary.
+ */
+
+/*
+ * _eval_sf_zf evaluates simultaneously SF and ZF unless ZF is already valid
+ * and protected because it is possible, although it is exceptional, to have
+ * SF and ZF set at the same time after a few instructions which may leave the
+ * flags in this apparently inconsistent state: sahf, popf, iret and the few
+ * (for now unimplemented) instructions which only affect ZF (lar, lsl, arpl,
+ * cmpxchg8b). This also solves the obscure case of ZF set and PF clear.
+ * On return: SF=cr6[0], ZF=cr6[2].
+ */
+
+_eval_sf_zf: andis. r5,flags,ZF_PROTECT>>16
+ rlwinm r3,flags,0,INCDEC_FIELD
+ RES_SHIFT(r4)
+ cntlzw r3,r3
+ slw r4,result,r4
+ srwi r5,r3,5 # ? use result : use op1
+ rlwinm r3,r3,2,0x18
+ oris flags,flags,(SF_IN_CR|SIGNED_IN_CR|ZF_IN_CR)>>16
+ neg r5,r5 # mux result/op2
+ slw r3,op2,r3
+ and r4,r4,r5
+ andc r3,r3,r5
+ xoris flags,flags,(SIGNED_IN_CR)>>16
+ bne- 1f # 12 instructions between set
+ or r3,r3,r4 # and test, good for folding
+ cmpwi cr6,r3,0
+ blr
+1: or. r3,r3,r4
+ crmove SF,0
+ blr
+
+/*
+ * _eval_cf may be called at any time, no other flag is affected.
+ * On return: CF=cr4[0], r3= CF ? 0x100:0 = CF<<8.
+ */
+_eval_cf: addc r3,flags,flags # CF_IN to xer[ca]
+ RES2CF(r4) # get 8 or 16 bit carry
+ subfe r3,result,op1 # generate PPC carry for
+ CF_ROTCNT(r5) # preceding operation
+ addze r3,r4 # put carry into LSB
+ CF_POL(r4,23) # polarity & 0x100
+ oris flags,flags,(CF_IN_CR|ABOVE_IN_CR)>>16
+ rlwnm r3,r3,r5,23,23 # shift carry there
+ xor r3,r3,r4 # CF <<8
+ xoris flags,flags,(ABOVE_IN_CR)>>16
+ cmplw cr4,one,r3 # sets cr4[0]
+ blr
+
+/*
+ * eval_of returns the overflow flag in OF_STATE field, which will be
+ * either 001 (OF clear) or 101 (OF set), is is only called when the two
+ * low order bits of OF_STATE are not 01 (otherwise it will work but
+ * it is an elaborate variant of a nop with a few registers destroyed)
+ * The code multiplexes several sources in a branchless way, was fun to write.
+ */
+_eval_of: GET_ADDSUB(r4) # 0(add)/1(sub)
+ rlwinm r3,flags,0,INCDEC_FIELD
+ neg r4,r4 # 0(add)/-1(sub)
+ eqv r5,result,op1 # result[]==op1[] (bit by bit)
+ cntlzw r3,r3 # inc/dec
+ xor r4,r4,op2 # true sign of op2
+ oris r5,r5,0x0808 # bits to clear
+ clrlwi r6,r3,31 # 0(inc)/1(dec)
+ eqv r4,r4,op1 # op1[]==op2[] (bit by bit)
+ add r6,op2,r6 # add 1 if dec
+ rlwinm r3,r3,2,0x18 # incdec_shift
+ andc r4,r4,r5 # arithmetic overflow
+ slw r3,r6,r3 # shifted inc/dec result
+ addis r3,r3,0x8000 # compare with 0x80000000
+ ori r4,r4,0x0808 # bits to set
+ cntlzw r3,r3 # 32 if inc/dec overflow
+ OF_ROTCNT(r6)
+ rlwimi r4,r3,18,0x00800000 # insert inc/dec overflow
+ rlwimi flags,one,24,OF_STATE_MASK
+ rlwnm r3,r4,r6,8,8 # get field
+ rlwimi flags,r3,3,OF_VALUE # insert OF
+ blr
+
+/*
+ * _eval_pf will always be called when needed (complex but infrequent),
+ * there are a few quirks for a branchless solution.
+ * On return: PF=cr0[0], PF=MSB(r3)
+ */
+_eval_pf: rlwinm r3,flags,0,INCDEC_FIELD
+ rotrwi r4,op2,4 # from inc/dec
+ rotrwi r5,result,4 # from result
+ cntlzw r3,r3 # use result if 32
+ xor r4,r4,op2
+ xor r5,r5,result
+ rlwinm r3,r3,26,0,0 # 32 becomes 0x80000000
+ clrlwi r4,r4,28
+ lis r6,0x9669 # constant to shift
+ clrlwi r5,r5,28
+ rlwnm r4,r6,r4,0,0 # parity from inc/dec
+ rlwnm r5,r6,r5,0,0 # parity from result
+ andc r4,r4,r3 # select which one
+ and r5,r5,r3
+ add. r3,r4,r5 # and test to simplify
+ blr # returns in r3 and cr0 set.
+
+/*
+ * _eval_af will always be called when needed (complex but infrequent):
+ * - if after inc, af is set when 4 low order bits of op1 are 0
+ * - if after dec, af is set when 4 low order bits of op1 are 1
+ * (or 0 after adding 1 as implemented here)
+ * - if after add/sub/adc/sbb/cmp af is set from sum of 4 LSB of op1
+ * and 4 LSB of op2 (eventually complemented) plus carry in.
+ * - other instructions leave AF undefined so the returned value is irrelevant.
+ * Returned value must be masked with 0x10, since all other bits are undefined.
+ * There branchless code is perhaps not the most efficient, but quite parallel.
+ */
+_eval_af: rlwinm r3,flags,0,INCDEC_FIELD
+ clrlwi r5,op2,28 # 4 LSB of op2
+ addc r4,flags,flags # carry_in
+ GET_ADDSUB(r6)
+ cntlzw r3,r3 # if inc/dec 16..23 else 32
+ neg r6,r6 # add/sub
+ clrlwi r4,r3,31 # if dec 1 else 0
+ xor r5,r5,r6 # conditionally complement
+ clrlwi r6,op1,28 # 4 LSB of op1
+ add r4,op2,r4 # op2+(dec ? 1 : 0)
+ clrlwi r4,r4,28 # 4 LSB of op2+(dec ? 1 : 0)
+ adde r5,r6,r5 # op1+cy_in+(op2/~op2)
+ cntlzw r4,r4 # 28..31 if not AF, 32 if set
+ andc r5,r5,r3 # masked AF from add/sub...
+ andc r4,r3,r4 # masked AF from inc/dec
+ or r3,r4,r5
+ blr
+
+/*
+ * _eval_above will only be called if ABOVE_IN_CR is not set.
+ * On return: ZF=cr6[2], CF=cr4[0], ABOVE=cr4[1]
+ */
+_eval_above: andis. r3,flags,ZF_IN_CR>>16
+ mflr r0
+ beql+ _eval_sf_zf
+ andis. r3,flags,CF_IN_CR>>16
+ beql+ _eval_cf
+ mtlr r0
+ oris flags,flags,ABOVE_IN_CR>>16
+ crnor ABOVE,ZF,CF
+ blr
+
+/* _eval_signed may only be called when signed_in_cr is clear ! */
+_eval_signed: andis. r3,flags,SF_IN_CR>>16
+ mflr r0
+ beql+ _eval_sf_zf
+/* SF_IN_CR and ZF_IN_CR are set, SIGNED_IN_CR is clear */
+ rlwinm. r3,flags,5,0,1
+ xoris flags,flags,(SIGNED_IN_CR|SF_IN_CR)>>16
+ bngl+ _eval_of
+ andis. r3,flags,OF_VALUE>>16
+ mtlr r0
+ crxor SLT,SF,OF
+ crnor SGT,SLT,ZF
+ blr
+
+_eval_flags: mflr r0
+ bl _eval_cf
+ li r7,2
+ rlwimi r7,r3,24,CF86,CF86 # 2 if CF clear, 3 if set
+ bl _eval_pf
+ andis. r4,flags,SF_IN_CR>>16
+ rlwimi r7,r3,32+PF-PF86,PF86,PF86
+ bl _eval_af
+ rlwimi r7,r3,0,AF86,AF86
+ beql+ _eval_sf_zf
+ mfcr r3
+ rlwinm. r4,flags,5,0,1
+ rlwimi r7,r3,0,DF86,SF86
+ ZF2ZF86(r3,r7)
+ bngl+ _eval_of
+ mtlr r0
+ lis r4,0x0004
+ lwz r3,eflags(state)
+ addi r4,r4,0x7000
+ rlwimi r7,flags,17,OF86,OF86
+ and r3,r3,r4
+ or r3,r3,r7
+ blr
+
+/* Quite simple for real mode, input in r4, returns in r3. */
+_segment_load: lwz r5,vbase(state)
+ rlwinm r3,r4,4,0xffff0 # segment selector * 16
+ add r3,r3,r5
+ blr
+
+/* To allow I/O port virtualization if necessary, code for exception in r3,
+port number in r4 */
+_check_port: lwz r5,ioperm(state)
+ rlwinm r6,r4,29,0x1fff # 0 to 8kB
+ lis r0,0xffff
+ lhbrx r5,r5,r6
+ clrlwi r6,r4,29 # modulo 8
+ rlwnm r0,r0,r3,0x0f # 1, 3, or 0xf
+ slw r0,r0,r6
+ and. r0,r0,r5
+ bne- complex
+ blr
+/*
+ * Instructions are in approximate functional order:
+ * 1) move, exchange, lea, push/pop, pusha/popa
+ * 2) cbw/cwde/cwd/cdq, zero/sign extending moves, in/out
+ * 3) arithmetic: add/sub/adc/sbb/cmp/inc/dec/neg
+ * 4) logical: and/or/xor/test/not/bt/btc/btr/bts/bsf/bsr
+ * 5) jump, call, ret
+ * 6) string instructions and xlat
+ * 7) rotate/shift/mul/div
+ * 8) segment register, far jumps, calls and rets, interrupts
+ * 9) miscellenaous (flags, bcd,...)
+ */
+
+#define MEM offset,base
+#define REG opreg,state
+#define SELECTORS 32
+#define SELBASES 64
+
+/* Immediate moves */
+movb_imm_reg: rlwinm opreg,opcode,2,28,29; lbz r3,1(eip)
+ rlwimi opreg,opcode,30,31,31; lbzu opcode,2(eip)
+ stbx r3,REG; GOTNEXT
+
+movw_imm_reg: lhz r3,1(eip); clrlslwi opreg,opcode,29,2; lbzu opcode,3(eip)
+ sthx r3,REG; GOTNEXT
+
+movl_imm_reg: lwz r3,1(eip); clrlslwi opreg,opcode,29,2; lbzu opcode,5(eip)
+ stwx r3,REG; GOTNEXT
+
+movb_imm_mem: lbz r0,1(eip); cmpwi opreg,0
+ lbzu opcode,2(eip); bne- ud
+ stbx r0,MEM; GOTNEXT
+
+movw_imm_mem: lhz r0,1(eip); cmpwi opreg,0
+ lbzu opcode,3(eip); bne- ud
+ sthx r0,MEM; GOTNEXT
+
+movl_imm_mem: lwz r0,1(eip); cmpwi opreg,0
+ lbzu opcode,5(eip); bne- ud
+ stwx r0,MEM; GOTNEXT
+
+/* The special short form moves between memory and al/ax/eax */
+movb_al_a32: lwbrx offset,eip,one; lbz r0,AL(state); lbzu opcode,5(eip)
+ stbx r0,MEM; GOTNEXT
+
+movb_al_a16: lhbrx offset,eip,one; lbz r0,AL(state); lbzu opcode,3(eip)
+ stbx r0,MEM; GOTNEXT
+
+movw_ax_a32: lwbrx offset,eip,one; lhz r0,AX(state); lbzu opcode,5(eip)
+ sthx r0,MEM; GOTNEXT
+
+movw_ax_a16: lhbrx offset,eip,one; lhz r0,AX(state); lbzu opcode,3(eip)
+ sthx r0,MEM; GOTNEXT
+
+movl_eax_a32: lwbrx offset,eip,one; lwz r0,EAX(state); lbzu opcode,5(eip)
+ stwx r0,MEM; GOTNEXT
+
+movl_eax_a16: lhbrx offset,eip,one; lwz r0,EAX(state); lbzu opcode,3(eip)
+ stwx r0,MEM; GOTNEXT
+
+movb_a32_al: lwbrx offset,eip,one; lbzu opcode,5(eip); lbzx r0,MEM
+ stb r0,AL(state); GOTNEXT
+
+movb_a16_al: lhbrx offset,eip,one; lbzu opcode,3(eip); lbzx r0,MEM
+ stb r0,AL(state); GOTNEXT
+
+movw_a32_ax: lwbrx offset,eip,one; lbzu opcode,5(eip); lhzx r0,MEM
+ sth r0,AX(state); GOTNEXT
+
+movw_a16_ax: lhbrx offset,eip,one; lbzu opcode,3(eip); lhzx r0,MEM
+ sth r0,AX(state); GOTNEXT
+
+movl_a32_eax: lwbrx offset,eip,one; lbzu opcode,5(eip); lwzx r0,MEM
+ stw r0,EAX(state); GOTNEXT
+
+movl_a16_eax: lhbrx offset,eip,one; lbzu opcode,3(eip); lwzx r0,MEM
+ stw r0,EAX(state); GOTNEXT
+
+/* General purpose move (all are exactly 4 instructions long) */
+ .align 4
+movb_reg_mem: lbzx r0,REG
+ NEXTBYTE(opcode)
+ stbx r0,MEM
+ GOTNEXT
+
+movw_reg_mem: lhzx r0,REG
+ NEXTBYTE(opcode)
+ sthx r0,MEM
+ GOTNEXT
+
+movl_reg_mem: lwzx r0,REG
+ NEXTBYTE(opcode)
+ stwx r0,MEM
+ GOTNEXT
+
+movb_mem_reg: lbzx r0,MEM
+ NEXTBYTE(opcode)
+ stbx r0,REG
+ GOTNEXT
+
+movw_mem_reg: lhzx r0,MEM
+ NEXTBYTE(opcode)
+ sthx r0,REG
+ GOTNEXT
+
+movl_mem_reg: lwzx r0,MEM
+ NEXTBYTE(opcode)
+ stwx r0,REG
+ GOTNEXT
+
+/* short form exchange ax/eax with register */
+xchgw_ax_reg: clrlslwi opreg,opcode,29,2
+ lhz r3,AX(state)
+ lhzx r4,REG
+ sthx r3,REG
+ sth r4,AX(state)
+ NEXT
+
+xchgl_eax_reg: clrlslwi opreg,opcode,29,2
+ lwz r3,EAX(state)
+ lwzx r4,REG
+ stwx r3,REG
+ stw r4,EAX(state)
+ NEXT
+
+/* General exchange (unlocked!) */
+xchgb_reg_mem: lbzx r3,MEM
+ lbzx r4,REG
+ NEXTBYTE(opcode)
+ stbx r3,REG
+ stbx r4,MEM
+ GOTNEXT
+
+xchgw_reg_mem: lhzx r3,MEM
+ lhzx r4,REG
+ sthx r3,REG
+ sthx r4,MEM
+ NEXT
+
+xchgl_reg_mem: lwzx r3,MEM
+ lwzx r4,REG
+ stwx r3,REG
+ stwx r4,MEM
+ NEXT
+
+/* lea, one of the simplest instructions */
+leaw: cmpw base,state
+ beq- ud
+ sthbrx offset,REG
+ NEXT
+
+leal: cmpw base,state
+ beq- ud
+ stwbrx offset,REG
+ NEXT
+
+/* Short form pushes and pops */
+pushw_sp_reg: li r3,SP
+ lhbrx r4,state,r3
+ clrlslwi opreg,opcode,29,2
+ lhzx r0,REG
+ addi r4,r4,-2
+ sthbrx r4,state,r3
+ clrlwi r4,r4,16
+ sthx r0,ssb,r4
+ NEXT
+
+pushl_sp_reg: li r3,SP
+ lhbrx r4,state,r3
+ clrlslwi opreg,opcode,29,2
+ lwzx r0,REG
+ addi r4,r4,-4
+ sthbrx r4,state,r3
+ clrlwi r4,r4,16
+ stwx r0,ssb,r4
+ NEXT
+
+popw_sp_reg: li r3,SP
+ lhbrx r4,state,r3
+ clrlslwi opreg,opcode,29,2
+ lhzx r0,ssb,r4
+ addi r4,r4,2 # order is important in case of pop sp
+ sthbrx r4,state,r3
+ sthx r0,REG
+ NEXT
+
+popl_sp_reg: li r3,SP
+ lhbrx r4,state,r3
+ clrlslwi opreg,opcode,29,2
+ lwzx r0,ssb,r4
+ addi r4,r4,4
+ sthbrx r4,state,r3
+ stwx r0,REG
+ NEXT
+
+/* Push immediate */
+pushw_sp_imm: li r3,SP
+ lhbrx r4,state,r3
+ lhz r0,1(eip)
+ addi r4,r4,-2
+ sthbrx r4,state,r3
+ clrlwi r4,r4,16
+ lbzu opcode,3(eip)
+ sthx r0,ssb,r4
+ GOTNEXT
+
+pushl_sp_imm: li r3,SP
+ lhbrx r4,state,r3
+ lwz r0,1(eip)
+ addi r4,r4,-4
+ sthbrx r4,state,r3
+ clrlwi r4,r4,16
+ lbzu opcode,5(eip)
+ stwx r0,ssb,r4
+ GOTNEXT
+
+pushw_sp_imm8: li r3,SP
+ lhbrx r4,state,r3
+ lhz r0,1(eip)
+ addi r4,r4,-2
+ sthbrx r4,state,r3
+ clrlwi r4,r4,16
+ lbzu opcode,2(eip)
+ extsb r0,r0
+ sthx r0,ssb,r4
+ GOTNEXT
+
+pushl_sp_imm8: li r3,SP
+ lhbrx r4,state,r3
+ lhz r0,1(eip)
+ addi r4,r4,-4
+ sthbrx r4,state,r3
+ clrlwi r4,r4,16
+ lbzu opcode,2(eip)
+ extsb r0,r0
+ stwx r0,ssb,r4
+ GOTNEXT
+
+/* General push/pop */
+pushw_sp: lhbrx r0,MEM
+ li r3,SP
+ lhbrx r4,state,r3
+ addi r4,r4,-2
+ sthbrx r4,state,r3
+ clrlwi r4,r4,16
+ sthbrx r0,r4,ssb
+ NEXT
+
+pushl_sp: lwbrx r0,MEM
+ li r3,SP
+ lhbrx r4,state,r3
+ addi r4,r4,-4
+ sthbrx r4,state,r3
+ clrlwi r4,r4,16
+ stwbrx r0,r4,ssb
+ NEXT
+
+/* pop is an exception with 32 bit addressing modes, it is possible
+to calculate wrongly the address when esp is used as base. But 16 bit
+addressing modes are safe */
+
+popw_sp_a16: cmpw cr1,opreg,0 # first check the opcode
+ li r3,SP
+ lhbrx r4,state,r3
+ bne- cr1,ud
+ lhzx r0,ssb,r4
+ addi r4,r4,2
+ sthx r0,MEM
+ sthbrx r4,state,r3
+ NEXT
+
+popl_sp_a16: cmpw cr1,opreg,0
+ li r3,SP
+ lhbrx r4,state,r3
+ bne- cr1,ud
+ lwzx r0,ssb,r4
+ addi r4,r4,2
+ stwx r0,MEM
+ sthbrx r4,state,r3
+ NEXT
+
+/* 32 bit addressing modes for pop not implemented for now. */
+ .equ popw_sp_a32,unimpl
+ .equ popl_sp_a32,unimpl
+
+/* pusha/popa */
+pushaw_sp: li r3,SP
+ li r0,8
+ lhbrx r4,r3,state
+ mtctr r0
+ addi r5,state,-4
+1: addi r4,r4,-2
+ lhzu r6,4(r5)
+ clrlwi r4,r4,16
+ sthx r6,ssb,r4
+ bdnz 1b
+ sthbrx r4,r3,state # new sp
+ NEXT
+
+pushal_sp: li r3,SP
+ li r0,8
+ lhbrx r4,r3,state
+ mtctr r0
+ addi r5,state,-4
+1: addi r4,r4,-4
+ lwzu r6,4(r5)
+ clrlwi r4,r4,16
+ stwx r6,ssb,r4
+ bdnz 1b
+ sthbrx r4,r3,state # new sp
+ NEXT
+
+popaw_sp: li r3,SP
+ li r0,8
+ lhbrx r4,state,r3
+ mtctr r0
+ addi r5,state,32
+1: lhzx r6,ssb,r4
+ addi r4,r4,2
+ sthu r6,-4(r5)
+ clrlwi r4,r4,16
+ bdnz 1b
+ sthbrx r4,r3,state # updated sp
+ NEXT
+
+popal_sp: li r3,SP
+ lis r0,0xef00 # mask to skip esp
+ lhbrx r4,state,r3
+ addi r5,state,32
+1: add. r0,r0,r0
+ lwzx r6,ssb,r4
+ addi r4,r4,4
+ stwu r6,-4(r5)
+ clrlwi r4,r4,16
+ blt 1b
+ addi r6,r6,-4
+ beq 2f
+ addi r4,r4,4
+ clrlwi r4,r4,16
+ b 1b
+2: sthbrx r4,state,r3 # updated sp
+ NEXT
+
+/* Moves with zero or sign extension: first the special cases */
+cbw: lbz r3,AL(state)
+ extsb r3,r3
+ sthbrx r3,AX,state
+ NEXT
+
+cwde: lhbrx r3,AX,state
+ extsh r3,r3
+ stwbrx r3,EAX,state
+ NEXT
+
+cwd: lbz r3,AH(state)
+ extsb r3,r3
+ srwi r3,r3,8 # get sign bits
+ sth r3,DX(state)
+ NEXT
+
+cdq: lwbrx r3,EAX,state
+ srawi r3,r3,31
+ stw r3,EDX(state) # byte order unimportant !
+ NEXT
+
+/* The move with zero or sign extension are special since the source
+and destination are not the same size. The register describing the destination
+is modified to take this into account. */
+
+movsbw: lbzx r3,MEM
+ rlwimi opreg,opreg,4,0x10
+ extsb r3,r3
+ rlwinm opreg,opreg,0,0x1c
+ sthbrx r3,REG
+ NEXT
+
+movsbl: lbzx r3,MEM
+ rlwimi opreg,opreg,4,0x10
+ extsb r3,r3
+ rlwinm opreg,opreg,0,0x1c
+ stwbrx r3,REG
+ NEXT
+
+ .equ movsww, movw_mem_reg
+
+movswl: lhbrx r3,MEM
+ extsh r3,r3
+ stwbrx r3,REG
+ NEXT
+
+movzbw: lbzx r3,MEM
+ rlwimi opreg,opreg,4,0x10
+ rlwinm opreg,opreg,0,0x1c
+ sthbrx r3,REG
+ NEXT
+
+movzbl: lbzx r3,MEM
+ rlwimi opreg,opreg,4,0x10
+ rlwinm opreg,opreg,0,0x1c
+ stwbrx r3,REG
+ NEXT
+
+ .equ movzww, movw_mem_reg
+
+movzwl: lhbrx r3,MEM
+ stwbrx r3,REG
+ NEXT
+
+/* Byte swapping */
+bswap: clrlslwi opreg,opcode,29,2 # extract reg from opcode
+ lwbrx r0,REG
+ stwx r0,REG
+ NEXT
+
+/* Input/output */
+inb_port_al: NEXTBYTE(r4)
+ b 1f
+inb_dx_al: li r4,DX
+ lhbrx r4,r4,state
+1: li r3,code_inb
+ bl _check_port
+ lwz r3,iobase(state)
+ lbzx r5,r4,r3
+ eieio
+ stb r5,AL(state)
+ NEXT
+
+inw_port_ax: NEXTBYTE(r4)
+ b 1f
+inw_dx_ax: li r4,DX
+ lhbrx r4,r4,state
+1: li r3,code_inw
+ bl _check_port
+ lwz r3,iobase(state)
+ lhzx r5,r4,r3
+ eieio
+ sth r5,AX(state)
+ NEXT
+
+inl_port_eax: NEXTBYTE(r4)
+ b 1f
+inl_dx_eax: li r4,DX
+ lhbrx r4,r4,state
+1: li r3,code_inl
+ bl _check_port
+ lwz r3,iobase(state)
+ lwzx r5,r4,r3
+ eieio
+ stw r5,EAX(state)
+ NEXT
+
+outb_al_port: NEXTBYTE(r4)
+ b 1f
+outb_al_dx: li r4,DX
+ lhbrx r4,r4,state
+1: li r3,code_outb
+ bl _check_port
+ lwz r3,iobase(state)
+ lbz r5,AL(state)
+ stbx r5,r4,r3
+ eieio
+ NEXT
+
+outw_ax_port: NEXTBYTE(r4)
+ b 1f
+outw_ax_dx: li r4,DX
+ lhbrx r4,r4,state
+1: li r3,code_outw
+ bl _check_port
+ lwz r3,iobase(state)
+ lhz r5,AX(state)
+ sthx r5,r4,r3
+ eieio
+ NEXT
+
+outl_eax_port: NEXTBYTE(r4)
+ b 1f
+outl_eax_dx: li r4,DX
+ lhbrx r4,r4,state
+1: li r3,code_outl
+ bl _check_port
+ lwz r4,iobase(state)
+ lwz r5,EAX(state)
+ stwx r5,r4,r3
+ eieio
+ NEXT
+
+/* Macro used for add and sub */
+#define ARITH(op,fl) \
+op##b_reg_mem: lbzx op1,MEM; SET_FLAGS(fl(B)); lbzx op2,REG; \
+ op result,op1,op2; \
+ stbx result,MEM; NEXT; \
+op##w_reg_mem: lhbrx op1,MEM; SET_FLAGS(fl(W)); lhbrx op2,REG; \
+ op result,op1,op2; \
+ sthbrx result,MEM; NEXT; \
+op##l_reg_mem: lwbrx op1,MEM; SET_FLAGS(fl(L)); lwbrx op2,REG; \
+ op result,op1,op2; \
+ stwbrx result,MEM; NEXT; \
+op##b_mem_reg: lbzx op2,MEM; SET_FLAGS(fl(B)); lbzx op1,REG; \
+ op result,op1,op2; \
+ stbx result,REG; NEXT; \
+op##w_mem_reg: lhbrx op2,MEM; SET_FLAGS(fl(W)); lhbrx op1,REG; \
+ op result,op1,op2; \
+ sthbrx result,REG; NEXT; \
+op##l_mem_reg: lwbrx op2,MEM; SET_FLAGS(fl(L)); lwbrx op1,REG; \
+ op result,op1,op2; \
+ stwbrx result,REG; NEXT; \
+op##b_imm_al: addi base,state,0; li offset,AL; \
+op##b_imm: lbzx op1,MEM; SET_FLAGS(fl(B)); lbz op2,1(eip); \
+ op result,op1,op2; \
+ lbzu opcode,2(eip); \
+ stbx result,MEM; GOTNEXT; \
+op##w_imm_ax: addi base,state,0; li offset,AX; \
+op##w_imm: lhbrx op1,MEM; SET_FLAGS(fl(W)); lhbrx op2,eip,one; \
+ op result,op1,op2; \
+ lbzu opcode,3(eip); \
+ sthbrx result,MEM; GOTNEXT; \
+op##w_imm8: lbz op2,1(eip); SET_FLAGS(fl(W)); lhbrx op1,MEM; \
+ extsb op2,op2; clrlwi op2,op2,16; \
+ op result,op1,op2; \
+ lbzu opcode,2(eip); \
+ sthbrx result,MEM; GOTNEXT; \
+op##l_imm_eax: addi base,state,0; li offset,EAX; \
+op##l_imm: lwbrx op1,MEM; SET_FLAGS(fl(L)); lwbrx op2,eip,one; \
+ op result,op1,op2; lbzu opcode,5(eip); \
+ stwbrx result,MEM; GOTNEXT; \
+op##l_imm8: lbz op2,1(eip); SET_FLAGS(fl(L)); lwbrx op1,MEM; \
+ extsb op2,op2; lbzu opcode,2(eip); \
+ op result,op1,op2; \
+ stwbrx result,MEM; GOTNEXT
+
+ ARITH(add, FLAGS_ADD)
+ ARITH(sub, FLAGS_SUB)
+
+#define adc(result, op1, op2) adde result,op1,op2
+#define sbb(result, op1, op2) subfe result,op2,op1
+
+#define ARITH_WITH_CARRY(op, fl) \
+op##b_reg_mem: lbzx op1,MEM; bl carryfor##op; lbzx op2,REG; \
+ ADD_FLAGS(fl(B)); op(result, op1, op2); \
+ stbx result,MEM; NEXT; \
+op##w_reg_mem: lhbrx op1,MEM; bl carryfor##op; lhbrx op2,REG; \
+ ADD_FLAGS(fl(W)); op(result, op1, op2); \
+ sthbrx result,MEM; NEXT; \
+op##l_reg_mem: lwbrx op1,MEM; bl carryfor##op; lwbrx op2,REG; \
+ ADD_FLAGS(fl(L)); op(result, op1, op2); \
+ stwbrx result,MEM; NEXT; \
+op##b_mem_reg: lbzx op1,MEM; bl carryfor##op; lbzx op2,REG; \
+ ADD_FLAGS(fl(B)); op(result, op1, op2); \
+ stbx result,REG; NEXT; \
+op##w_mem_reg: lhbrx op1,MEM; bl carryfor##op; lhbrx op2,REG; \
+ ADD_FLAGS(fl(W)); op(result, op1, op2); \
+ sthbrx result,REG; NEXT; \
+op##l_mem_reg: lwbrx op1,MEM; bl carryfor##op; lwbrx op2,REG; \
+ ADD_FLAGS(fl(L)); op(result, op1, op2); \
+ stwbrx result,REG; NEXT; \
+op##b_imm_al: addi base,state,0; li offset,AL; \
+op##b_imm: lbzx op1,MEM; bl carryfor##op; lbz op2,1(eip); \
+ ADD_FLAGS(fl(B)); lbzu opcode,2(eip); op(result, op1, op2); \
+ stbx result,MEM; GOTNEXT; \
+op##w_imm_ax: addi base,state,0; li offset,AX; \
+op##w_imm: lhbrx op1,MEM; bl carryfor##op; lhbrx op2,eip,one; \
+ ADD_FLAGS(fl(W)); lbzu opcode,3(eip); op(result, op1, op2); \
+ sthbrx result,MEM; GOTNEXT; \
+op##w_imm8: lbz op2,1(eip); bl carryfor##op; lhbrx op1,MEM; \
+ extsb op2,op2; ADD_FLAGS(fl(W)); clrlwi op2,op2,16; \
+ lbzu opcode,2(eip); op(result, op1, op2); \
+ sthbrx result,MEM; GOTNEXT; \
+op##l_imm_eax: addi base,state,0; li offset,EAX; \
+op##l_imm: lwbrx op1,MEM; bl carryfor##op; lwbrx op2,eip,one; \
+ ADD_FLAGS(fl(L)); lbzu opcode,5(eip); op(result, op1, op2); \
+ stwbrx result,MEM; GOTNEXT; \
+op##l_imm8: lbz op2,1(eip); SET_FLAGS(fl(L)); lwbrx op1,MEM; \
+ extsb op2,op2; lbzu opcode,2(eip); \
+ op(result, op1, op2); \
+ stwbrx result,MEM; GOTNEXT
+
+carryforadc: addc r3,flags,flags # CF_IN to xer[ca]
+ RES2CF(r4) # get 8 or 16 bit carry
+ subfe r3,result,op1 # generate PPC carry for
+ CF_ROTCNT(r5) # preceding operation
+ addze r3,r4 # 32 bit carry in LSB
+ CF_POL(r4,23) # polarity
+ rlwnm r3,r3,r5,0x100 # shift carry there
+ xor flags,r4,r3 # CF86 ? 0x100 : 0
+ addic r4,r3,0xffffff00 # set xer[ca]
+ rlwinm flags,r3,23,CF_IN
+ blr
+
+ ARITH_WITH_CARRY(adc, FLAGS_ADD)
+
+/* for sbb the input carry must be the complement of the x86 carry */
+carryforsbb: addc r3,flags,flags # CF_IN to xer[ca]
+ RES2CF(r4) # 8/16 bit carry from result
+ subfe r3,result,op1
+ CF_ROTCNT(r5)
+ addze r3,r4
+ CF_POL(r4,23)
+ rlwnm r3,r3,r5,0x100
+ eqv flags,r4,r3 # CF86 ? 0xfffffeff:0xffffffff
+ addic r4,r3,1 # set xer[ca]
+ rlwinm flags,r3,23,CF_IN # keep only the carry
+ blr
+
+ ARITH_WITH_CARRY(sbb, FLAGS_SBB)
+
+cmpb_reg_mem: lbzx op1,MEM
+ SET_FLAGS(FLAGS_CMP(B))
+ lbzx op2,REG
+ extsb r3,op1
+ cmplw cr4,op1,op2
+ extsb r4,op2
+ sub result,op1,op2
+ cmpw cr6,r3,r4
+ NEXT
+
+cmpw_reg_mem: lhbrx op1,MEM
+ SET_FLAGS(FLAGS_CMP(W))
+ lhbrx op2,REG
+ extsh r3,op1
+ cmplw cr4,op1,op2
+ extsh r4,op2
+ sub result,op1,op2
+ cmpw cr6,r3,r4
+ NEXT
+
+cmpl_reg_mem: lwbrx op1,MEM
+ SET_FLAGS(FLAGS_CMP(L))
+ lwbrx op2,REG
+ cmplw cr4,op1,op2
+ sub result,op1,op2
+ cmpw cr6,op1,op2
+ NEXT
+
+cmpb_mem_reg: lbzx op2,MEM
+ SET_FLAGS(FLAGS_CMP(B))
+ lbzx op1,REG
+ extsb r4,op2
+ cmplw cr4,op1,op2
+ extsb r3,op1
+ sub result,op1,op2
+ cmpw cr6,r3,r4
+ NEXT
+
+cmpw_mem_reg: lhbrx op2,MEM
+ SET_FLAGS(FLAGS_CMP(W))
+ lhbrx op1,REG
+ extsh r4,op2
+ cmplw cr4,op1,op2
+ extsh r3,op1
+ sub result,op1,op2
+ cmpw cr6,r3,r4
+ NEXT
+
+cmpl_mem_reg: lwbrx op2,MEM
+ SET_FLAGS(FLAGS_CMP(L))
+ lwbrx op1,REG
+ cmpw cr6,op1,op2
+ sub result,op1,op2
+ cmplw cr4,op1,op2
+ NEXT
+
+cmpb_imm_al: addi base,state,0
+ li offset,AL
+cmpb_imm: lbzx op1,MEM
+ SET_FLAGS(FLAGS_CMP(B))
+ lbz op2,1(eip)
+ extsb r3,op1
+ cmplw cr4,op1,op2
+ lbzu opcode,2(eip)
+ extsb r4,op2
+ sub result,op1,op2
+ cmpw cr6,r3,r4
+ GOTNEXT
+
+cmpw_imm_ax: addi base,state,0
+ li offset,AX
+cmpw_imm: lhbrx op1,MEM
+ SET_FLAGS(FLAGS_CMP(W))
+ lhbrx op2,eip,one
+ extsh r3,op1
+ cmplw cr4,op1,op2
+ lbzu opcode,3(eip)
+ extsh r4,op2
+ sub result,op1,op2
+ cmpw cr6,r3,r4
+ GOTNEXT
+
+cmpw_imm8: lbz op2,1(eip)
+ SET_FLAGS(FLAGS_CMP(W))
+ lhbrx op1,MEM
+ extsb r4,op2
+ extsh r3,op1
+ lbzu opcode,2(eip)
+ clrlwi op2,r4,16
+ cmpw cr6,r3,r4
+ sub result,op1,op2
+ cmplw cr4,op1,op2
+ GOTNEXT
+
+cmpl_imm_eax: addi base,state,0
+ li offset,EAX
+cmpl_imm: lwbrx op1,MEM
+ SET_FLAGS(FLAGS_CMP(L))
+ lwbrx op2,eip,one
+ cmpw cr6,op1,op2
+ lbzu opcode,5(eip)
+ sub result,op1,op2
+ cmplw cr4,op1,op2
+ GOTNEXT
+
+cmpl_imm8: lbz op2,1(eip)
+ SET_FLAGS(FLAGS_CMP(L))
+ lwbrx op1,MEM
+ extsb op2,op2
+ lbzu opcode,2(eip)
+ cmpw cr6,op1,op2
+ sub result,op1,op2
+ cmplw cr4,op1,op2
+ GOTNEXT
+
+/* Increment and decrement */
+incb: lbzx op2,MEM
+ INC_FLAGS(B)
+ addi op2,op2,1
+ stbx op2,MEM
+ NEXT
+
+incw_reg: clrlslwi opreg,opcode,29,2 # extract reg from opcode
+ lhbrx op2,REG
+ INC_FLAGS(W)
+ addi op2,op2,1
+ sthbrx op2,REG
+ NEXT
+
+incw: lhbrx op2,MEM
+ INC_FLAGS(W)
+ addi op2,op2,1
+ sthbrx op2,MEM
+ NEXT
+
+incl_reg: clrlslwi opreg,opcode,29,2
+ lwbrx op2,REG
+ INC_FLAGS(L)
+ addi op2,op2,1
+ sthbrx op2,REG
+ NEXT
+
+incl: lwbrx op2,MEM
+ INC_FLAGS(L)
+ addi op2,op2,1
+ stwbrx op2,MEM
+ NEXT
+
+decb: lbzx op2,MEM
+ DEC_FLAGS(B)
+ addi op2,op2,-1
+ stbx op2,MEM
+ NEXT
+
+decw_reg: clrlslwi opreg,opcode,29,2 # extract reg from opcode
+ lhbrx op2,REG
+ DEC_FLAGS(W)
+ addi op2,op2,-1
+ sthbrx op2,REG
+ NEXT
+
+decw: lhbrx op2,MEM
+ DEC_FLAGS(W)
+ addi op2,op2,-1
+ sthbrx op2,MEM
+ NEXT
+
+decl_reg: clrlslwi opreg,opcode,29,2
+ lwbrx op2,REG
+ DEC_FLAGS(L)
+ addi op2,op2,-1
+ sthbrx op2,REG
+ NEXT
+
+decl: lwbrx op2,MEM
+ DEC_FLAGS(L)
+ addi op2,op2,-1
+ stwbrx op2,MEM
+ NEXT
+
+negb: lbzx op2,MEM
+ SET_FLAGS(FLAGS_SUB(B))
+ neg result,op2
+ li op1,0
+ stbx result,MEM
+ NEXT
+
+negw: lhbrx op2,MEM
+ SET_FLAGS(FLAGS_SUB(W))
+ neg result,op2
+ li op1,0
+ sthbrx r0,MEM
+ NEXT
+
+negl: lwbrx op2,MEM
+ SET_FLAGS(FLAGS_SUB(L))
+ subfic result,op2,0
+ li op1,0
+ stwbrx result,MEM
+ NEXT
+
+/* Macro used to generate code for OR/AND/XOR */
+#define LOGICAL(op) \
+op##b_reg_mem: lbzx op1,MEM; SET_FLAGS(FLAGS_LOG(B)); lbzx op2,REG; \
+ op result,op1,op2; \
+ stbx result,MEM; NEXT; \
+op##w_reg_mem: lhbrx op1,MEM; SET_FLAGS(FLAGS_LOG(W)); lhbrx op2,REG; \
+ op result,op1,op2; \
+ sthbrx result,MEM; NEXT; \
+op##l_reg_mem: lwbrx op1,MEM; SET_FLAGS(FLAGS_LOG(L)); lwbrx op2,REG; \
+ op result,op1,op2; \
+ stwbrx result,MEM; NEXT; \
+op##b_mem_reg: lbzx op1,MEM; SET_FLAGS(FLAGS_LOG(B)); lbzx op2,REG; \
+ op result,op1,op2; \
+ stbx result,REG; NEXT; \
+op##w_mem_reg: lhbrx op2,MEM; SET_FLAGS(FLAGS_LOG(W)); lhbrx op1,REG; \
+ op result,op1,op2; \
+ sthbrx result,REG; NEXT; \
+op##l_mem_reg: lwbrx op2,MEM; SET_FLAGS(FLAGS_LOG(L)); lwbrx op1,REG; \
+ op result,op1,op2; \
+ stwbrx result,REG; NEXT; \
+op##b_imm_al: addi base,state,0; li offset,AL; \
+op##b_imm: lbzx op1,MEM; SET_FLAGS(FLAGS_LOG(B)); lbz op2,1(eip); \
+ op result,op1,op2; lbzu opcode,2(eip); \
+ stbx result,MEM; GOTNEXT; \
+op##w_imm_ax: addi base,state,0; li offset,AX; \
+op##w_imm: lhbrx op1,MEM; SET_FLAGS(FLAGS_LOG(W)); lhbrx op2,eip,one; \
+ op result,op1,op2; lbzu opcode,3(eip); \
+ sthbrx result,MEM; GOTNEXT; \
+op##w_imm8: lbz op2,1(eip); SET_FLAGS(FLAGS_LOG(W)); lhbrx op1,MEM; \
+ extsb op2,op2; lbzu opcode,2(eip); \
+ op result,op1,op2; \
+ sthbrx result,MEM; GOTNEXT; \
+op##l_imm_eax: addi base,state,0; li offset,EAX; \
+op##l_imm: lwbrx op1,MEM; SET_FLAGS(FLAGS_LOG(L)); lwbrx op2,eip,one; \
+ op result,op1,op2; lbzu opcode,5(eip); \
+ stwbrx result,MEM; GOTNEXT; \
+op##l_imm8: lbz op2,1(eip); SET_FLAGS(FLAGS_LOG(L)); lwbrx op1,MEM; \
+ extsb op2,op2; lbzu opcode,2(eip); \
+ op result,op1,op2; \
+ stwbrx result,MEM; GOTNEXT
+
+ LOGICAL(or)
+
+ LOGICAL(and)
+
+ LOGICAL(xor)
+
+testb_reg_mem: lbzx op1,MEM
+ SET_FLAGS(FLAGS_TEST(B))
+ lbzx op2,REG
+ and result,op1,op2
+ extsb r3,result
+ cmpwi cr6,r3,0
+ NEXT
+
+testw_reg_mem: lhbrx op1,MEM
+ SET_FLAGS(FLAGS_TEST(W))
+ lhbrx op2,REG
+ and result,op1,op2
+ extsh r3,result
+ cmpwi cr6,r3,0
+ NEXT
+
+testl_reg_mem: lwbrx r3,MEM
+ SET_FLAGS(FLAGS_TEST(L))
+ lwbrx r4,REG
+ and result,op1,op2
+ cmpwi cr6,result,0
+ NEXT
+
+testb_imm_al: addi base,state,0
+ li offset,AL
+testb_imm: lbzx op1,MEM
+ SET_FLAGS(FLAGS_TEST(B))
+ lbz op2,1(eip)
+ and result,op1,op2
+ lbzu opcode,2(eip)
+ extsb r3,result
+ cmpwi cr6,r3,0
+ GOTNEXT
+
+testw_imm_ax: addi base,state,0
+ li offset,AX
+testw_imm: lhbrx op1,MEM
+ SET_FLAGS(FLAGS_TEST(W))
+ lhbrx op2,eip,one
+ and result,op1,op2
+ lbzu opcode,3(eip)
+ extsh r3,result
+ cmpwi cr6,r3,0
+ GOTNEXT
+
+testl_imm_eax: addi base,state,0
+ li offset,EAX
+testl_imm: lwbrx op1,MEM
+ SET_FLAGS(FLAGS_TEST(L))
+ lwbrx op2,eip,one
+ and result,r3,r4
+ lbzu opcode,5(eip)
+ cmpwi cr6,result,0
+ GOTNEXT
+
+/* Not does not affect flags */
+notb: lbzx r3,MEM
+ xori r3,r3,255
+ stbx r3,MEM
+ NEXT
+
+notw: lhzx r3,MEM
+ xori r3,r3,65535
+ sthx r3,MEM
+ NEXT
+
+notl: lwzx r3,MEM
+ not r3,r3
+ stwx r3,MEM
+ NEXT
+
+boundw: lhbrx r4,REG
+ li r3,code_bound
+ lhbrx r5,MEM
+ addi offset,offset,2
+ extsh r4,r4
+ lhbrx r6,MEM
+ extsh r5,r5
+ cmpw r4,r5
+ extsh r6,r6
+ blt- complex
+ cmpw r4,r6
+ ble+ nop
+ b complex
+
+boundl: lwbrx r4,REG
+ li r3,code_bound
+ lwbrx r5,MEM
+ addi offset,offset,4
+ lwbrx r6,MEM
+ cmpw r4,r5
+ blt- complex
+ cmpw r4,r6
+ ble+ nop
+ b complex
+
+/* Bit test and modify instructions */
+
+/* Common routine: bit index in op2, returns memory value in r3, mask in op2,
+and of mask and value in op1. CF flag is set as with 32 bit add when bit is
+non zero since result (which is cleared) will be less than op1, and in cr4,
+all other flags are undefined from Intel doc. Here OF and SF are cleared
+and ZF is set as a side effect of result being cleared. */
+_setup_bitw: cmpw base,state
+ SET_FLAGS(FLAGS_BTEST)
+ extsh op2,op2
+ beq- 1f
+ srawi r4,op2,4
+ add offset,offset,r4
+1: clrlwi op2,op2,28 # true bit index
+ lhbrx r3,MEM
+ slw op2,one,op2 # build mask
+ li result,0 # implicitly sets CF
+ and op1,r3,op2 # if result<op1
+ cmplw cr4,result,op1 # sets CF in cr4
+ blr
+
+_setup_bitl: cmpw base,state
+ SET_FLAGS(FLAGS_BTEST)
+ beq- 1f
+ srawi r4,op2,5
+ add offset,offset,r4
+1: lwbrx r3,MEM
+ rotlw op2,one,op2 # build mask
+ li result,0
+ and op1,r3,op2
+ cmplw cr4,result,op1
+ blr
+
+/* Immediate forms bit tests are not frequent since logical are often faster */
+btw_imm: NEXTBYTE(op2)
+ b 1f
+btw_reg_mem: lhbrx op2,REG
+1: bl _setup_bitw
+ NEXT
+
+btl_imm: NEXTBYTE(op2)
+ b 1f
+btl_reg_mem: lhbrx op2,REG
+1: bl _setup_bitl
+ NEXT
+
+btcw_imm: NEXTBYTE(op2)
+ b 1f
+btcw_reg_mem: lhbrx op2,REG
+1: bl _setup_bitw
+ xor r3,r3,op2
+ sthbrx r3,MEM
+ NEXT
+
+btcl_imm: NEXTBYTE(op2)
+ b 1f
+btcl_reg_mem: lhbrx op2,REG
+1: bl _setup_bitl
+ xor r3,r3,op2
+ stwbrx result,MEM
+ NEXT
+
+btrw_imm: NEXTBYTE(op2)
+ b 1f
+btrw_reg_mem: lhbrx op2,REG
+1: bl _setup_bitw
+ andc r3,r3,op2
+ sthbrx r3,MEM
+ NEXT
+
+btrl_imm: NEXTBYTE(op2)
+ b 1f
+btrl_reg_mem: lhbrx op2,REG
+1: bl _setup_bitl
+ andc r3,r3,op2
+ stwbrx r3,MEM
+ NEXT
+
+btsw_imm: NEXTBYTE(op2)
+ b 1f
+btsw_reg_mem: lhbrx op2,REG
+1: bl _setup_bitw
+ or r3,r3,op2
+ sthbrx r3,MEM
+ NEXT
+
+btsl_imm: NEXTBYTE(op2)
+ b 1f
+btsl_reg_mem: lhbrx op2,REG
+1: bl _setup_bitl
+ or r3,r3,op2
+ stwbrx r3,MEM
+ NEXT
+
+/* Bit string search instructions, only ZF is defined after these, and the
+result value is not defined when the bit field is zero. */
+bsfw: lhbrx result,MEM
+ SET_FLAGS(FLAGS_BSRCH(W))
+ neg r3,result
+ cmpwi cr6,result,0 # sets ZF
+ and r3,r3,result # keep only LSB
+ cntlzw r3,r3
+ subfic r3,r3,31
+ sthbrx r3,REG
+ NEXT
+
+bsfl: lwbrx result,MEM
+ SET_FLAGS(FLAGS_BSRCH(L))
+ neg r3,result
+ cmpwi cr6,result,0 # sets ZF
+ and r3,r3,result # keep only LSB
+ cntlzw r3,r3
+ subfic r3,r3,31
+ stwbrx r3,REG
+ NEXT
+
+bsrw: lhbrx result,MEM
+ SET_FLAGS(FLAGS_BSRCH(W))
+ cntlzw r3,result
+ cmpwi cr6,result,0
+ subfic r3,r3,31
+ sthbrx r3,REG
+ NEXT
+
+bsrl: lwbrx result,MEM
+ SET_FLAGS(FLAGS_BSRCH(L))
+ cntlzw r3,result
+ cmpwi cr6,result,0
+ subfic r3,r3,31
+ stwbrx r3,REG
+ NEXT
+
+/* Unconditional jumps, first the indirect than relative */
+jmpw: lhbrx eip,MEM
+ lbzux opcode,eip,csb
+ GOTNEXT
+
+jmpl: lwbrx eip,MEM
+ lbzux opcode,eip,csb
+ GOTNEXT
+
+sjmp_w: lbz r3,1(eip)
+ sub eip,eip,csb
+ addi eip,eip,2 # EIP after instruction
+ extsb r3,r3
+ add eip,eip,r3
+ clrlwi eip,eip,16 # module 64k
+ lbzux opcode,eip,csb
+ GOTNEXT
+
+jmp_w: lhbrx r3,eip,one # eip now off by 3
+ sub eip,eip,csb
+ addi r3,r3,3 # compensate
+ add eip,eip,r3
+ clrlwi eip,eip,16
+ lbzux opcode,eip,csb
+ GOTNEXT
+
+sjmp_l: lbz r3,1(eip)
+ addi eip,eip,2
+ extsb r3,r3
+ lbzux opcode,eip,r3
+ GOTNEXT
+
+jmp_l: lwbrx r3,eip,one # Simple
+ addi eip,eip,5
+ lbzux opcode,eip,r3
+ GOTNEXT
+
+/* The conditional jumps: although it should not happen,
+byte relative jumps (sjmp) may wrap around in 16 bit mode */
+
+#define NOTTAKEN_S lbzu opcode,2(eip); GOTNEXT
+#define NOTTAKEN_W lbzu opcode,3(eip); GOTNEXT
+#define NOTTAKEN_L lbzu opcode,5(eip); GOTNEXT
+
+#define CONDJMP(cond, eval, flag) \
+sj##cond##_w: EVAL_##eval; bt flag,sjmp_w; NOTTAKEN_S; \
+j##cond##_w: EVAL_##eval; bt flag,jmp_w; NOTTAKEN_W; \
+sj##cond##_l: EVAL_##eval; bt flag,sjmp_l; NOTTAKEN_S; \
+j##cond##_l: EVAL_##eval; bt flag,jmp_l; NOTTAKEN_L; \
+sjn##cond##_w: EVAL_##eval; bf flag,sjmp_w; NOTTAKEN_S; \
+jn##cond##_w: EVAL_##eval; bf flag,jmp_w; NOTTAKEN_W; \
+sjn##cond##_l: EVAL_##eval; bf flag,sjmp_l; NOTTAKEN_S; \
+jn##cond##_l: EVAL_##eval; bf flag,jmp_l; NOTTAKEN_L
+
+ CONDJMP(o, OF, OF)
+ CONDJMP(c, CF, CF)
+ CONDJMP(z, ZF, ZF)
+ CONDJMP(a, ABOVE, ABOVE)
+ CONDJMP(s, SF, SF)
+ CONDJMP(p, PF, PF)
+ CONDJMP(g, SIGNED, SGT)
+ CONDJMP(l, SIGNED, SLT)
+
+jcxz_w: lhz r3,CX(state); cmpwi r3,0; beq- sjmp_w; NOTTAKEN_S
+jcxz_l: lhz r3,CX(state); cmpwi r3,0; beq- sjmp_l; NOTTAKEN_S
+jecxz_w: lwz r3,ECX(state); cmpwi r3,0; beq- sjmp_w; NOTTAKEN_S
+jecxz_l: lwz r3,ECX(state); cmpwi r3,0; beq- sjmp_l; NOTTAKEN_S
+
+/* Note that loop is somewhat strange, the data size attribute gives
+the size of eip, and the address size whether the counter is cx or ecx.
+This is the same for jcxz/jecxz. */
+
+loopw_w: li opreg,CX
+ lhbrx r0,REG
+ sub. r0,r0,one
+ sthbrx r0,REG
+ bne+ sjmp_w
+ NOTTAKEN_S
+
+loopl_w: li opreg,ECX
+ lwbrx r0,REG
+ sub. r0,r0,one
+ stwbrx r0,REG
+ bne+ sjmp_w
+ NOTTAKEN_S
+
+loopw_l: li opreg,CX
+ lhbrx r0,REG
+ sub. r0,r0,one
+ sthbrx r0,REG
+ bne+ sjmp_l
+ NOTTAKEN_S
+
+loopl_l: li opreg,ECX
+ lwbrx r0,REG
+ sub. r0,r0,one
+ stwbrx r0,REG
+ bne+ sjmp_l
+ NOTTAKEN_S
+
+loopzw_w: li opreg,CX
+ lhbrx r0,REG
+ EVAL_ZF
+ sub. r0,r0,one
+ sthbrx r0,REG
+ bf ZF,1f
+ bne+ sjmp_w
+1: NOTTAKEN_S
+
+loopzl_w: li opreg,ECX
+ lwbrx r0,REG
+ EVAL_ZF
+ sub. r3,r3,one
+ stwbrx r3,REG
+ bf ZF,1f
+ bne+ sjmp_w
+1: NOTTAKEN_S
+
+loopzw_l: li opreg,CX
+ lhbrx r0,REG
+ EVAL_ZF
+ sub. r0,r0,one
+ sthbrx r0,REG
+ bf ZF,1f
+ bne+ sjmp_l
+1: NOTTAKEN_S
+
+loopzl_l: li opreg,ECX
+ lwbrx r0,REG
+ EVAL_ZF
+ sub. r0,r0,one
+ stwbrx r0,REG
+ bf ZF,1f
+ bne+ sjmp_l
+1: NOTTAKEN_S
+
+loopnzw_w: li opreg,CX
+ lhbrx r0,REG
+ EVAL_ZF
+ sub. r0,r0,one
+ sthbrx r0,REG
+ bt ZF,1f
+ bne+ sjmp_w
+1: NOTTAKEN_S
+
+loopnzl_w: li opreg,ECX
+ lwbrx r0,REG
+ EVAL_ZF
+ sub. r0,r0,one
+ stwbrx r0,REG
+ bt ZF,1f
+ bne+ sjmp_w
+1: NOTTAKEN_S
+
+loopnzw_l: li opreg,CX
+ lhbrx r0,REG
+ EVAL_ZF
+ sub. r0,r0,one
+ sthbrx r0,REG
+ bt ZF,1f
+ bne+ sjmp_l
+1: NOTTAKEN_S
+
+loopnzl_l: li opreg,ECX
+ lwbrx r0,REG
+ EVAL_ZF
+ sub. r0,r0,one
+ stwbrx r0,REG
+ bt ZF,1f
+ bne+ sjmp_l
+1: NOTTAKEN_S
+
+/* Memory indirect calls are rare enough to limit code duplication */
+callw_sp_mem: lhbrx r3,MEM
+ sub r4,eip,csb
+ addi r4,r4,1 # r4 is now return address
+ b 1f
+ .equ calll_sp_mem, unimpl
+
+callw_sp: lhbrx r3,eip,one
+ sub r4,eip,csb
+ addi r4,r4,3 # r4 is return address
+ add r3,r4,r3
+1: clrlwi eip,r3,16
+ li r5,SP
+ lhbrx r6,state,r5 # get sp
+ addi r6,r6,-2
+ lbzux opcode,eip,csb
+ sthbrx r6,state,r5 # update sp
+ clrlwi r6,r6,16
+ sthbrx r4,ssb,r6 # push return address
+ GOTNEXT
+ .equ calll_sp, unimpl
+
+retw_sp_imm: li opreg,SP
+ lhbrx r4,REG
+ lhbrx r6,eip,one
+ addi r5,r4,2
+ lhbrx eip,ssb,r4
+ lbzux opcode,eip,csb
+ add r5,r5,r6
+ sthbrx r5,REG
+ GOTNEXT
+
+ .equ retl_sp_imm, unimpl
+
+retw_sp: li opreg,SP
+ lhbrx r4,REG
+ addi r5,r4,2
+ lhbrx eip,ssb,r4
+ lbzux opcode,eip,csb
+ sthbrx r5,REG
+ GOTNEXT
+
+ .equ retl_sp, unimpl
+
+/* Enter is a mess, and the description in Intel documents is actually wrong
+ * in most revisions (all PPro/PII I have but the old Pentium is Ok) !
+ */
+
+enterw_sp: lhbrx r0,eip,one # Stack space to allocate
+ li opreg,SP
+ lhbrx r3,REG # SP
+ li r7,BP
+ lbzu r4,3(eip) # nesting level
+ addi r3,r3,-2
+ lhbrx r5,state,r7 # Original BP
+ clrlwi r3,r3,16
+ sthbrx r5,ssb,r3 # Push BP
+ andi. r4,r4,31 # modulo 32 and test
+ mr r6,r3 # Save frame pointer to temp
+ beq 3f
+ mtctr r4 # iterate level-1 times
+ b 2f
+1: addi r5,r5,-2 # copy list of frame pointers
+ clrlwi r5,r5,16
+ lhzx r4,ssb,r5
+ addi r3,r3,-2
+ clrlwi r3,r3,16
+ sthx r4,ssb,r3
+2: bdnz 1b
+ addi r3,r3,-2 # save current frame pointer
+ clrlwi r3,r3,16
+ sthbrx r6,ssb,r3
+3: sthbrx r6,state,r7 # New BP
+ sub r3,r3,r0
+ sthbrx r3,REG # Save new stack pointer
+ NEXT
+
+ .equ enterl_sp, unimpl
+
+leavew_sp: li opreg,BP
+ lhbrx r3,REG # Stack = BP
+ addi r4,r3,2 #
+ lhzx r3,ssb,r3
+ li opreg,SP
+ sthbrx r4,REG # New Stack
+ sth r3,BP(state) # Popped BP
+ NEXT
+
+ .equ leavel_sp, unimpl
+
+/* String instructions: first a generic setup routine, which exits early
+if there is a repeat prefix with a count of 0 */
+#define STRINGSRC base,offset
+#define STRINGDST esb,opreg
+
+_setup_stringw: li offset,SI #
+ rlwinm. r3,opcode,19,0,1 # lt=repnz, gt= repz, eq none
+ li opreg,DI
+ lhbrx offset,state,offset # load si
+ li r3,1 # no repeat
+ lhbrx opreg,state,opreg # load di
+ beq 1f # no repeat
+ li r3,CX
+ lhbrx r3,state,r3 # load CX
+ cmpwi r3,0
+ beq nop # early exit here !
+1: mtctr r3 # ctr=CX or 1
+ li r7,1 # stride
+ bflr+ DF
+ li r7,-1 # change stride sign
+ blr
+
+/* Ending routine to update all changed registers (goes directly to NEXT) */
+_finish_strw: li r4,SI
+ sthbrx offset,state,r4 # update si
+ li r4,DI
+ sthbrx opreg,state,r4 # update di
+ beq nop
+ mfctr r3
+ li r4,CX
+ sthbrx r3,state,r4 # update cx
+ NEXT
+
+lodsb_a16: bl _setup_stringw
+1: lbzx r0,STRINGSRC # [rep] lodsb
+ add offset,offset,r7
+ clrlwi offset,offset,16
+ bdnz 1b
+ stb r0,AL(state)
+ b _finish_strw
+
+lodsw_a16: bl _setup_stringw
+ slwi r7,r7,1
+1: lhzx r0,STRINGSRC # [rep] lodsw
+ add offset,offset,r7
+ clrlwi offset,offset,16
+ bdnz 1b
+ sth r0,AX(state)
+ b _finish_strw
+
+lodsl_a16: bl _setup_stringw
+ slwi r7,r7,2
+1: lwzx r0,STRINGSRC # [rep] lodsl
+ add offset,offset,r7
+ clrlwi offset,offset,16
+ bdnz 1b
+ stw r0,EAX(state)
+ b _finish_strw
+
+stosb_a16: bl _setup_stringw
+ lbz r0,AL(state)
+1: stbx r0,STRINGDST # [rep] stosb
+ add opreg,opreg,r7
+ clrlwi opreg,opreg,16
+ bdnz 1b
+ b _finish_strw
+
+stosw_a16: bl _setup_stringw
+ lhz r0,AX(state)
+ slwi r7,r7,1
+1: sthx r0,STRINGDST # [rep] stosw
+ add opreg,opreg,r7
+ clrlwi opreg,opreg,16
+ bdnz 1b
+ b _finish_strw
+
+stosl_a16: bl _setup_stringw
+ lwz r0,EAX(state)
+ slwi r7,r7,2
+1: stwx r0,STRINGDST # [rep] stosl
+ add opreg,opreg,r7
+ clrlwi opreg,opreg,16
+ bdnz 1b
+ b _finish_strw
+
+movsb_a16: bl _setup_stringw
+1: lbzx r0,STRINGSRC # [rep] movsb
+ add offset,offset,r7
+ stbx r0,STRINGDST
+ clrlwi offset,offset,16
+ add opreg,opreg,r7
+ clrlwi opreg,opreg,16
+ bdnz 1b
+ b _finish_strw
+
+movsw_a16: bl _setup_stringw
+ slwi r7,r7,1
+1: lhzx r0,STRINGSRC # [rep] movsw
+ add offset,offset,r7
+ sthx r0,STRINGDST
+ clrlwi offset,offset,16
+ add opreg,opreg,r7
+ clrlwi opreg,opreg,16
+ bdnz 1b
+ b _finish_strw
+
+movsl_a16: bl _setup_stringw
+ slwi r7,r7,2
+1: lwzx r0,STRINGSRC # [rep] movsl
+ add offset,offset,r7
+ stwx r0,STRINGDST
+ clrlwi offset,offset,16
+ add opreg,opreg,r7
+ clrlwi opreg,opreg,16
+ bdnz 1b
+ b _finish_strw
+
+/* At least on a Pentium, repeated string I/O instructions check for
+access port permission even if count is 0 ! So the order of the check is not
+important. */
+insb_a16: li r4,DX
+ li r3,code_insb_a16
+ lhbrx r4,state,r4
+ bl _check_port
+ bl _setup_stringw
+ lwz base,iobase(state)
+1: lbzx r0,base,r4 # [rep] insb
+ eieio
+ stbx r0,STRINGDST
+ add opreg,opreg,r7
+ clrlwi opreg,opreg,16
+ bdnz 1b
+ b _finish_strw
+
+insw_a16: li r4,DX
+ li r3,code_insw_a16
+ lhbrx r4,state,r4
+ bl _check_port
+ bl _setup_stringw
+ lwz base,iobase(state)
+ slwi r7,r7,1
+1: lhzx r0,base,r4 # [rep] insw
+ eieio
+ sthx r0,STRINGDST
+ add opreg,opreg,r7
+ clrlwi opreg,opreg,16
+ bdnz 1b
+ b _finish_strw
+
+insl_a16: li r4,DX
+ li r3,code_insl_a16
+ lhbrx r4,state,r4
+ bl _check_port
+ bl _setup_stringw
+ lwz base,iobase(state)
+ slwi r7,r7,2
+1: lwzx r0,base,r4 # [rep] insl
+ eieio
+ stwx r0,STRINGDST
+ add opreg,opreg,r7
+ clrlwi opreg,opreg,16
+ bdnz 1b
+ b _finish_strw
+
+outsb_a16: li r4,DX
+ li r3,code_outsb_a16
+ lhbrx r4,state,r4
+ bl _check_port
+ bl _setup_stringw
+ lwz r6,iobase(state)
+1: lbzx r0,STRINGSRC # [rep] outsb
+ add offset,offset,r7
+ stbx r0,r6,r4
+ clrlwi offset,offset,16
+ eieio
+ bdnz 1b
+ b _finish_strw
+
+outsw_a16: li r4,DX
+ li r3,code_outsw_a16
+ lhbrx r4,state,r4
+ bl _check_port
+ bl _setup_stringw
+ li r5,DX
+ lwz r6,iobase(state)
+ slwi r7,r7,1
+1: lhzx r0,STRINGSRC # [rep] outsw
+ add offset,offset,r7
+ sthx r0,r6,r4
+ clrlwi offset,offset,16
+ eieio
+ bdnz 1b
+ b _finish_strw
+
+outsl_a16: li r4,DX
+ li r3,code_outsl_a16
+ lhbrx r4,state,r4
+ bl _check_port
+ bl _setup_stringw
+ lwz r6,iobase(state)
+ slwi r7,r7,2
+1: lwzx r0,STRINGSRC # [rep] outsl
+ add offset,offset,r7
+ stwx r0,r6,r4
+ clrlwi offset,offset,16
+ eieio
+ bdnz 1b
+ b _finish_strw
+
+cmpsb_a16: bl _setup_stringw
+ SET_FLAGS(FLAGS_CMP(B))
+ blt 3f # repnz prefix
+1: lbzx op1,STRINGSRC # [repz] cmpsb
+ add offset,offset,r7
+ lbzx op2,STRINGDST
+ add opreg,opreg,r7
+ cmplw cr4,op1,op2
+ clrlwi offset,offset,16
+ clrlwi opreg,opreg,16
+ bdnzt CF+2,1b
+2: extsb r3,op1
+ extsb r4,op2
+ cmpw cr6,r3,r4
+ sub result,op1,op2
+ b _finish_strw
+
+3: lbzx op1,STRINGSRC # repnz cmpsb
+ add offset,offset,r7
+ lbzx op2,STRINGDST
+ add opreg,opreg,r7
+ cmplw cr4,op1,op2
+ clrlwi offset,offset,16
+ clrlwi opreg,opreg,16
+ bdnzf CF+2,3b
+ b 2b
+
+cmpsw_a16: bl _setup_stringw
+ SET_FLAGS(FLAGS_CMP(W))
+ slwi r7,r7,1
+ blt 3f # repnz prefix
+1: lhbrx op1,STRINGSRC # [repz] cmpsb
+ add offset,offset,r7
+ lhbrx op2,STRINGDST
+ add opreg,opreg,r7
+ cmplw cr4,op1,op2
+ clrlwi offset,offset,16
+ clrlwi opreg,opreg,16
+ bdnzt CF+2,1b
+2: extsh r3,op1
+ extsh r4,op2
+ cmpw cr6,r3,r4
+ sub result,op1,op2
+ b _finish_strw
+
+3: lhbrx op1,STRINGSRC # repnz cmpsw
+ add offset,offset,r7
+ lhbrx op2,STRINGDST
+ add opreg,opreg,r7
+ cmplw cr4,op1,op2
+ clrlwi offset,offset,16
+ clrlwi opreg,opreg,16
+ bdnzf CF+2,3b
+ b 2b
+
+cmpsl_a16: bl _setup_stringw
+ SET_FLAGS(FLAGS_CMP(L))
+ slwi r7,r7,2
+ blt 3f # repnz prefix
+1: lwbrx op1,STRINGSRC # [repz] cmpsl
+ add offset,offset,r7
+ lwbrx op2,STRINGDST
+ add opreg,opreg,r7
+ cmplw cr4,op1,op2
+ clrlwi offset,offset,16
+ clrlwi opreg,opreg,16
+ bdnzt CF+2,1b
+2: cmpw cr6,op1,op2
+ sub result,op1,op2
+ b _finish_strw
+
+3: lwbrx op1,STRINGSRC # repnz cmpsl
+ add offset,offset,r7
+ lwbrx op2,STRINGDST
+ add opreg,opreg,r7
+ cmplw cr4,op1,op2
+ clrlwi offset,offset,16
+ clrlwi opreg,opreg,16
+ bdnzf CF+2,3b
+ b 2b
+
+scasb_a16: bl _setup_stringw
+ lbzx op1,AL,state # AL
+ SET_FLAGS(FLAGS_CMP(B))
+ bgt 3f # repz prefix
+1: lbzx op2,STRINGDST # [repnz] scasb
+ add opreg,opreg,r7
+ cmplw cr4,op1,op2
+ clrlwi opreg,opreg,16
+ bdnzf CF+2,1b
+2: extsb r3,op1
+ extsb r4,op2
+ cmpw cr6,r3,r4
+ sub result,op1,op2
+ b _finish_strw
+
+3: lbzx op2,STRINGDST # repz scasb
+ add opreg,opreg,r7
+ cmplw cr4,op1,op2
+ clrlwi opreg,opreg,16
+ bdnzt CF+2,3b
+ b 2b
+
+scasw_a16: bl _setup_stringw
+ lhbrx op1,AX,state
+ SET_FLAGS(FLAGS_CMP(W))
+ slwi r7,r7,1
+ bgt 3f # repz prefix
+1: lhbrx op2,STRINGDST # [repnz] scasw
+ add opreg,opreg,r7
+ cmplw cr4,op1,op2
+ clrlwi opreg,opreg,16
+ bdnzf CF+2,1b
+2: extsh r3,op1
+ extsh r4,op2
+ cmpw cr6,r3,r4
+ sub result,op1,op2
+ b _finish_strw
+
+3: lhbrx op2,STRINGDST # repz scasw
+ add opreg,opreg,r7
+ cmplw cr4,op1,op2
+ clrlwi opreg,opreg,16
+ bdnzt CF+2,3b
+ b 2b
+
+scasl_a16: bl _setup_stringw
+ lwbrx op1,EAX,state
+ SET_FLAGS(FLAGS_CMP(L))
+ slwi r7,r7,2
+ bgt 3f # repz prefix
+1: lwbrx op2,STRINGDST # [repnz] scasl
+ add opreg,opreg,r7
+ cmplw cr4,op1,op2
+ clrlwi opreg,opreg,16
+ bdnzf CF+2,1b
+2: cmpw cr6,op1,op2
+ sub result,op1,op2
+ b _finish_strw
+
+3: lwbrx op2,STRINGDST # repz scasl
+ add opreg,opreg,r7
+ cmplw cr4,op1,op2
+ clrlwi opreg,opreg,16
+ bdnzt CF+2,3b
+ b 2b
+
+ .equ lodsb_a32, unimpl
+ .equ lodsw_a32, unimpl
+ .equ lodsl_a32, unimpl
+ .equ stosb_a32, unimpl
+ .equ stosw_a32, unimpl
+ .equ stosl_a32, unimpl
+ .equ movsb_a32, unimpl
+ .equ movsw_a32, unimpl
+ .equ movsl_a32, unimpl
+ .equ insb_a32, unimpl
+ .equ insw_a32, unimpl
+ .equ insl_a32, unimpl
+ .equ outsb_a32, unimpl
+ .equ outsw_a32, unimpl
+ .equ outsl_a32, unimpl
+ .equ cmpsb_a32, unimpl
+ .equ cmpsw_a32, unimpl
+ .equ cmpsl_a32, unimpl
+ .equ scasb_a32, unimpl
+ .equ scasw_a32, unimpl
+ .equ scasl_a32, unimpl
+
+xlatb_a16: li offset,BX
+ lbz r3,AL(state)
+ lhbrx offset,offset,state
+ add r3,r3,base
+ lbzx r3,r3,offset
+ stb r3,AL(state)
+ NEXT
+
+ .equ xlatb_a32, unimpl
+
+/*
+ * Shift and rotates: note the oddity that rotates do not affect SF/ZF/AF/PF
+ * but shifts do. Also testing has indicated that rotates with a count of zero
+ * do not affect any flag. The documentation specifies this for shifts but
+ * is more obscure for rotates. The overflow flag setting is only specified
+ * when count is 1, otherwise OF is undefined which simplifies emulation.
+ */
+
+/*
+ * The rotates through carry are among the most difficult instructions,
+ * they are implemented as a shift of 2*n+some bits depending on case.
+ * First the left rotates through carry.
+ */
+
+/* Byte rcl is performed on 18 bits (17 actually used) in a single register */
+rclb_imm: NEXTBYTE(r3)
+ b 1f
+rclb_cl: lbz r3,CL(state)
+ b 1f
+rclb_1: li r3,1
+1: lbzx r0,MEM
+ andi. r3,r3,31 # count%32
+ addc r4,flags,flags # CF_IN->xer[ca]
+ RES2CF(r6)
+ subfe r4,result,op1
+ mulli r5,r3,29 # 29=ceil(256/9)
+ CF_ROTCNT(r7)
+ addze r6,r6
+ CF_POL_INSERT(r0,23)
+ srwi r5,r5,8 # count/9
+ rlwnm r6,r6,r7,0x100
+ xor r0,r0,r6 # (23)0:CF:data8
+ rlwimi r5,r5,3,26,28 # 9*(count/9)
+ rlwimi r0,r0,23,0,7 # CF:(data8):(14)0:CF:data8
+ sub r3,r3,r5 # count%9
+ beq- nop # no flags changed if count 0
+ ROTATE_FLAGS
+ rlwnm r0,r0,r3,0x000001ff # (23)0:NewCF:Result8
+ rlwimi flags,r0,19,CF_VALUE
+ stbx r0,MEM
+ rlwimi flags,r0,18,OF_XOR
+ NEXT
+
+/* Word rcl is performed on 33 bits (CF:data16:CF:(15 MSB of data16) */
+rclw_imm: NEXTBYTE(r3)
+ b 1f
+rclw_cl: lbz r3,CL(state)
+ b 1f
+rclw_1: li r3,1
+1: lhbrx r0,MEM
+ andi. r3,r3,31 # count=count%32
+ addc r4,flags,flags
+ RES2CF(r6)
+ subfe r4,result,op1
+ addi r5,r3,15 # modulo 17: >=32 if >=17
+ CF_ROTCNT(r7)
+ addze r6,r6
+ addi r7,r7,8
+ CF_POL_INSERT(r0,15)
+ srwi r5,r5,5 # count/17
+ rlwnm r6,r6,r7,0x10000
+ rlwimi r5,r5,4,27,27 # 17*(count/17)
+ xor r0,r0,r6 # (15)0:CF:data16
+ sub r3,r3,r5 # count%17
+ rlwinm r4,r0,15,0xffff0000 # CF:(15 MSB of data16):(16)0
+ slw r0,r0,r3 # New carry and MSBs
+ rlwnm r4,r4,r3,16,31 # New LSBs
+ beq- nop # no flags changed if count 0
+ ROTATE_FLAGS
+ add r0,r0,r4 # result
+ rlwimi flags,r0,11,CF_VALUE
+ sthbrx r0,MEM
+ rlwimi flags,r0,10,OF_XOR
+ NEXT
+
+/* Longword rcl only needs 64 bits because the maximum rotate count is 31 ! */
+rcll_imm: NEXTBYTE(r3)
+ b 1f
+rcll_cl: lbz r3,CL(state)
+ b 1f
+rcll_1: li r3,1
+1: lwbrx r0,MEM
+ andi. r3,r3,31 # count=count%32
+ addc r4,r4,flags # ~XER[CA]
+ RES2CF(r6)
+ subfe r4,result,op1
+ CF_ROTCNT(r7)
+ addze r6,r6
+ srwi r4,r0,1 # 0:(31 MSB of data32)
+ addi r7,r7,23
+ CF_POL_INSERT(r4,0)
+ rlwnm r6,r6,r7,0,0
+ beq- nop # no flags changed if count 0
+ subfic r5,r3,32
+ xor r4,r4,r6
+ ROTATE_FLAGS
+ slw r0,r0,r3 # New MSBs
+ srw r5,r4,r5 # New LSBs
+ rlwnm r4,r4,r3,0,0 # New Carry
+ add r0,r0,r5 # result
+ rlwimi flags,r4,28,CF_VALUE
+ rlwimi flags,r0,27,OF_XOR
+ stwbrx r0,MEM
+ NEXT
+
+/* right rotates through carry are even worse because PPC only has a left
+rotate instruction. Somewhat tough when combined with modulo 9, 17, or
+33 operation and the rules of OF and CF flag settings. */
+/* Byte rcr is performed on 17 bits */
+rcrb_imm: NEXTBYTE(r3)
+ b 1f
+rcrb_cl: lbz r3,CL(state)
+ b 1f
+rcrb_1: li r3,1
+1: lbzx r0,MEM
+ andi. r3,r3,31 # count%32
+ addc r4,flags,flags # cf_in->xer[ca]
+ RES2CF(r6)
+ mulli r5,r3,29 # 29=ceil(256/9)
+ subfe r4,result,op1
+ CF_ROTCNT(r7)
+ addze r6,r6
+ CF_POL_INSERT(r0,23)
+ srwi r5,r5,8 # count/9
+ rlwimi r0,r0,9,0x0001fe00 # (15)0:data8:0:data8
+ rlwnm r6,r6,r7,0x100
+ rlwimi r5,r5,3,26,28 # 9*(count/9)
+ xor r0,r0,r6 # (15)0:data8:CF:data8
+ sub r3,r3,r5 # count%9
+ beq- nop # no flags changed if count 0
+ ROTATE_FLAGS
+ srw r0,r0,r3 # (23)junk:NewCF:Result8
+ rlwimi flags,r0,19,CF_VALUE|OF_XOR
+ stbx r0,MEM
+ NEXT
+
+/* Word rcr is a 33 bit right shift with a quirk, because the 33rd bit
+is only needed when the rotate count is 16 and rotating left or right
+by 16 a 32 bit quantity is the same ! */
+rcrw_imm: NEXTBYTE(r3)
+ b 1f
+rcrw_cl: lbz r3,CL(state)
+ b 1f
+rcrw_1: li r3,1
+1: lhbrx r0,MEM
+ andi. r3,r3,31 # count%32
+ addc r4,flags,flags # cf_in->xer[ca]
+ RES2CF(r6)
+ subfe r4,result,op1
+ addi r5,r3,15 # >=32 if >=17
+ CF_ROTCNT(r7)
+ addze r6,r6
+ addi r7,r7,8
+ CF_POL_INSERT(r0,15)
+ srwi r5,r5,5 # count/17
+ rlwnm r6,r6,r7,0x10000
+ rlwinm r7,r0,16,0x01 # MSB of data16
+ rlwimi r0,r0,17,0xfffe0000 # (15 MSB of data16):0:data16
+ rlwimi r5,r5,4,27,27 # 17*(count/17)
+ xor r0,r0,r6 # (15 MSB of data16):CF:data16
+ sub r3,r3,r5 # count%17
+ beq- nop # no flags changed if count 0
+ srw r0,r0,r3 # shift right
+ rlwnm r7,r7,r3,0x10000 # just in case count=16
+ ROTATE_FLAGS
+ add r0,r0,r7 # junk15:NewCF:result16
+ rlwimi flags,r0,11,CF_VALUE|OF_XOR
+ sthbrx r0,MEM
+ NEXT
+
+/* Longword rcr need only 64 bits since the rotate count is limited to 31 */
+rcrl_imm: NEXTBYTE(r3)
+ b 1f
+rcrl_cl: lbz r3,CL(state)
+ b 1f
+rcrl_1: li r3,1
+1: lwbrx r0,MEM
+ andi. r3,r3,31 # count%32
+ addc r4,flags,flags
+ RES2CF(r6)
+ subfe r4,result,op1
+ CF_ROTCNT(r7)
+ slwi r4,r0,1 # (31MSB of data32):0
+ addze r6,r6
+ addi r7,r7,24
+ CF_POL_INSERT(r4,31)
+ rlwnm r6,r6,r7,0x01
+ beq- nop # no flags changed if count 0
+ subfic r7,r3,32
+ xor r4,r4,r6
+ srw r0,r0,r3 # Result LSB
+ slw r5,r4,r7 # Result MSB
+ srw r4,r4,r3 # NewCF in LSB
+ add r0,r0,r5 # result
+ rlwimi flags,r4,27,CF_VALUE
+ stwbrx r0,MEM
+ rlwimi flags,r0,27,OF_XOR
+ NEXT
+
+/* After the rotates through carry, normal rotates are so simple ! */
+rolb_imm: NEXTBYTE(r3)
+ b 1f
+rolb_cl: lbz r3,CL(state)
+ b 1f
+rolb_1: li r3,1
+1: lbzx r0,MEM
+ andi. r4,r3,31 # count%32 == 0 ?
+ clrlwi r3,r3,29 # count%8
+ rlwimi r0,r0,24,0xff000000 # replicate for shift in
+ beq- nop # no flags changed if count 0
+ ROTATE_FLAGS
+ rotlw r0,r0,r3
+ rlwimi flags,r0,27,CF_VALUE # New CF
+ stbx r0,MEM
+ rlwimi flags,r0,26,OF_XOR # New OF (CF xor MSB)
+ NEXT
+
+rolw_imm: NEXTBYTE(r3)
+ b 1f
+rolw_cl: lbz r3,CL(state)
+ b 1f
+rolw_1: li r3,1
+1: lhbrx r0,MEM
+ andi. r3,r3,31
+ rlwimi r0,r0,16,0,15 # duplicate
+ beq- nop # no flags changed if count 0
+ ROTATE_FLAGS
+ rotlw r0,r0,r3 # result word duplicated
+ rlwimi flags,r0,27,CF_VALUE # New CF
+ sthbrx r0,MEM
+ rlwimi flags,r0,26,OF_XOR # New OF (CF xor MSB)
+ NEXT
+
+roll_imm: NEXTBYTE(r3)
+ b 1f
+roll_cl: lbz r3,CL(state)
+ b 1f
+roll_1: li r3,1
+1: lwbrx r0,MEM
+ andi. r3,r3,31
+ beq- nop # no flags changed if count 0
+ ROTATE_FLAGS
+ rotlw r0,r0,r3 # result
+ rlwimi flags,r0,27,CF_VALUE # New CF
+ stwbrx r0,MEM
+ rlwimi flags,r0,26,OF_XOR # New OF (CF xor MSB)
+ NEXT
+
+rorb_imm: NEXTBYTE(r3)
+ b 1f
+rorb_cl: lbz r3,CL(state)
+ b 1f
+rorb_1: li r3,1
+1: lbzx r0,MEM
+ andi. r4,r3,31 # count%32 == 0 ?
+ clrlwi r3,r3,29 # count%8
+ rlwimi r0,r0,8,0x0000ff00 # replicate for shift in
+ beq- nop # no flags changed if count 0
+ ROTATE_FLAGS
+ srw r0,r0,r3
+ rlwimi flags,r0,20,CF_VALUE
+ stbx r0,MEM
+ rlwimi flags,r0,19,OF_XOR
+ NEXT
+
+rorw_imm: NEXTBYTE(r3)
+ b 1f
+rorw_cl: lbz r3,CL(state)
+ b 1f
+rorw_1: li r3,1
+1: lhbrx r0,MEM
+ andi. r4,r3,31
+ clrlwi r3,r3,28 # count %16
+ rlwimi r0,r0,16,0xffff0000 # duplicate
+ beq- nop # no flags changed if count 0
+ ROTATE_FLAGS
+ srw r0,r0,r3 # junk16:result16
+ rlwimi flags,r0,12,CF_VALUE
+ sthbrx r0,MEM
+ rlwimi flags,r0,11,OF_XOR
+ NEXT
+
+rorl_imm: NEXTBYTE(r3)
+ b 1f
+rorl_cl: lbz r3,CL(state)
+ b 1f
+rorl_1: li r3,1
+1: lwbrx r0,MEM
+ andi. r4,r3,31
+ neg r3,r3
+ beq- nop # no flags changed if count 0
+ ROTATE_FLAGS
+ rotlw r0,r0,r3 # result
+ rlwimi flags,r0,28,CF_VALUE
+ stwbrx r0,MEM
+ rlwimi flags,r0,27,OF_XOR
+ NEXT
+
+/* Right arithmetic shifts: they clear OF whenever count!=0 */
+#define SAR_FLAGS CF_ZERO|OF_ZERO|RESL
+sarb_imm: NEXTBYTE(r3)
+ b 1f
+sarb_cl: lbz r3,CL(state)
+ b 1f
+sarb_1: li r3,1
+1: lbzx r4,MEM
+ andi. r3,r3,31
+ addi r5,r3,-1
+ extsb r4,r4
+ beq- nop # no flags changed if count 0
+ SET_FLAGS(SAR_FLAGS)
+ sraw result,r4,r3
+ srw r5,r4,r5
+ stbx result,MEM
+ rlwimi flags,r5,27,CF_VALUE
+ NEXT
+
+sarw_imm: NEXTBYTE(r3)
+ b 1f
+sarw_cl: lbz r3,CL(state)
+ b 1f
+sarw_1: li r3,1
+1: lhbrx r4,MEM
+ andi. r3,r3,31
+ addi r5,r3,-1
+ extsh r4,r4
+ beq- nop # no flags changed if count 0
+ SET_FLAGS(SAR_FLAGS)
+ sraw result,r4,r3
+ srw r5,r4,r5
+ sthbrx result,MEM
+ rlwimi flags,r5,27,CF_VALUE
+ NEXT
+
+sarl_imm: NEXTBYTE(r3)
+ b 1f
+sarl_cl: lbz r3,CL(state)
+ b 1f
+sarl_1: li r3,1
+1: lwbrx r4,MEM
+ andi. r3,r3,31
+ addi r5,r3,-1
+ beq- nop # no flags changed if count 0
+ SET_FLAGS(SAR_FLAGS)
+ sraw result,r4,r3
+ srw r5,r4,r5
+ stwbrx result,MEM
+ rlwimi flags,r5,27,CF_VALUE
+ NEXT
+
+/* Left shifts are quite easy: they use the flag mechanism of add */
+shlb_imm: NEXTBYTE(r3)
+ b 1f
+shlb_cl: lbz r3,CL(state)
+ b 1f
+shlb_1: li r3,1
+1: andi. r3,r3,31
+ beq- nop # no flags changed if count 0
+ lbzx op1,MEM
+ SET_FLAGS(FLAGS_ADD(B))
+ slw result,op1,r3
+ addi op2,op1,0 # for OF computation only !
+ stbx result,MEM
+ NEXT
+
+shlw_imm: NEXTBYTE(r3)
+ b 1f
+shlw_cl: lbz r3,CL(state)
+ b 1f
+shlw_1: li r3,1
+1: andi. r3,r3,31
+ beq- nop # no flags changed if count 0
+ lhbrx op1,MEM
+ SET_FLAGS(FLAGS_ADD(W))
+ slw result,op1,r3
+ addi op2,op1,0 # for OF computation only !
+ sthbrx result,MEM
+ NEXT
+
+/* That one may be wrong */
+shll_imm: NEXTBYTE(r3)
+ b 1f
+shll_cl: lbz r3,CL(state)
+ b 1f
+shll_1: li r3,1
+1: andi. r3,r3,31
+ beq- nop # no flags changed if count 0
+ lwbrx op1,MEM
+ addi r4,r3,-1
+ SET_FLAGS(FLAGS_ADD(L))
+ slw result,op1,r3
+ addi op2,op1,0 # for OF computation only !
+ slw op1,op1,r4 # for CF computation
+ stwbrx result,MEM
+ NEXT
+
+/* Right shifts are quite complex, because of funny flag rules ! */
+shrb_imm: NEXTBYTE(r3)
+ b 1f
+shrb_cl: lbz r3,CL(state)
+ b 1f
+shrb_1: li r3,1
+1: andi. r3,r3,31
+ beq- nop # no flags changed if count 0
+ lbzx op1,MEM
+ addi r4,r3,-1
+ SET_FLAGS(FLAGS_SHR(B))
+ srw result,op1,r3
+ srw r4,op1,r4
+ li op2,-1 # for OF computation only !
+ stbx result,MEM
+ rlwimi flags,r4,27,CF_VALUE # Set CF
+ NEXT
+
+shrw_imm: NEXTBYTE(r3)
+ b 1f
+shrw_cl: lbz r3,CL(state)
+ b 1f
+shrw_1: li r3,1
+1: andi. r3,r3,31
+ beq- nop # no flags changed if count 0
+ lhbrx op1,MEM
+ addi r4,r3,-1
+ SET_FLAGS(FLAGS_SHR(W))
+ srw result,op1,r3
+ srw r4,op1,r4
+ li op2,-1 # for OF computation only !
+ sthbrx result,MEM
+ rlwimi flags,r4,27,CF_VALUE # Set CF
+ NEXT
+
+shrl_imm: NEXTBYTE(r3)
+ b 1f
+shrl_cl: lbz r3,CL(state)
+ b 1f
+shrl_1: li r3,1
+1: andi. r3,r3,31
+ beq- nop # no flags changed if count 0
+ lwbrx op1,MEM
+ addi r4,r3,-1
+ SET_FLAGS(FLAGS_SHR(L))
+ srw result,op1,r3
+ srw r4,op1,r4
+ li op2,-1 # for OF computation only !
+ stwbrx result,MEM
+ rlwimi flags,r4,27,CF_VALUE # Set CF
+ NEXT
+
+/* Double length shifts, shldw uses FLAGS_ADD for simplicity */
+shldw_imm: NEXTBYTE(r3)
+ b 1f
+shldw_cl: lbz r3,CL(state)
+1: andi. r3,r3,31
+ beq- nop
+ lhbrx op1,MEM
+ SET_FLAGS(FLAGS_ADD(W))
+ lhbrx op2,REG
+ rlwimi op1,op2,16,0,15 # op2:op1
+ addi op2,op1,0
+ rotlw result,op1,r3
+ sthbrx result,MEM
+ NEXT
+
+shldl_imm: NEXTBYTE(r3)
+ b 1f
+shldl_cl: lbz r3,CL(state)
+1: andi. r3,r3,31
+ beq- nop
+ lwbrx op1,MEM
+ SET_FLAGS(FLAGS_DBLSH(L))
+ lwbrx op2,REG
+ subfic r4,r3,32
+ slw result,op1,r3
+ srw r4,op2,r4
+ rotlw r3,op1,r3
+ or result,result,r4
+ addi op2,op1,0
+ rlwimi flags,r3,27,CF_VALUE
+ stwbrx result,MEM
+ NEXT
+
+shrdw_imm: NEXTBYTE(r3)
+ b 1f
+shrdw_cl: lbz r3,CL(state)
+1: andi. r3,r3,31
+ beq- nop
+ lhbrx op1,MEM
+ SET_FLAGS(FLAGS_DBLSH(W))
+ lhbrx op2,REG
+ addi r4,r3,-1
+ rlwimi op1,op2,16,0,15 # op2:op1
+ addi op2,op1,0
+ srw result,op1,r3
+ srw r4,op1,r4
+ sthbrx result,MEM
+ rlwimi flags,r4,27,CF_VALUE
+ NEXT
+
+shrdl_imm: NEXTBYTE(r3)
+ b 1f
+shrdl_cl: lbz r3,CL(state)
+1: andi. r3,r3,31
+ beq- nop
+ lwbrx op1,MEM
+ SET_FLAGS(FLAGS_DBLSH(L))
+ lwbrx op2,REG
+ subfic r4,r3,32
+ srw result,op1,r3
+ addi r3,r3,-1
+ slw r4,op2,r4
+ srw r3,op1,r3
+ or result,result,r4
+ addi op2,op1,0
+ rlwimi flags,r3,27,CF_VALUE
+ stwbrx result,MEM
+ NEXT
+
+/* One operand multiplies: with result double the operand size, unsigned */
+mulb: lbzx op2,MEM
+ lbz op1,AL(state)
+ mullw result,op1,op2
+ SET_FLAGS(FLAGS_MUL)
+ subfic r3,result,255
+ sthbrx result,AX,state
+ rlwimi flags,r3,0,CF_VALUE|OF_VALUE
+ NEXT
+
+mulw: lhbrx op2,MEM
+ lhbrx op1,AX,state
+ mullw result,op1,op2
+ SET_FLAGS(FLAGS_MUL)
+ li r4,DX
+ srwi r3,result,16
+ sthbrx result,AX,state
+ neg r5,r3
+ sthbrx r3,r4,state # DX
+ rlwimi flags,r5,0,CF_VALUE|OF_VALUE
+ NEXT
+
+mull: lwbrx op2,MEM
+ lwbrx op1,EAX,state
+ mullw result,op1,op2
+ mulhwu. r3,op1,op2
+ SET_FLAGS(FLAGS_MUL)
+ stwbrx result,EAX,state
+ li r4,EDX
+ stwbrx r3,r4,state
+ beq+ nop
+ oris flags,flags,(CF_SET|OF_SET)>>16
+ NEXT
+
+/* One operand multiplies: with result double the operand size, signed */
+imulb: lbzx op2,MEM
+ extsb op2,op2
+ lbz op1,AL(state)
+ extsb op1,op1
+ mullw result,op1,op2
+ SET_FLAGS(FLAGS_MUL)
+ extsb r3,result
+ sthbrx result,AX,state
+ cmpw r3,result
+ beq+ nop
+ oris flags,flags,(CF_SET|OF_SET)>>16
+ NEXT
+
+imulw: lhbrx op2,MEM
+ extsh op2,op2
+ lhbrx op1,AX,state
+ extsh op1,op1
+ mullw result,op1,op2
+ SET_FLAGS(FLAGS_MUL)
+ li r3,DX
+ extsh r4,result
+ srwi r5,result,16
+ sthbrx result,AX,state
+ cmpw r4,result
+ sthbrx r5,r3,state
+ beq+ nop
+ oris flags,flags,(CF_SET|OF_SET)>>16
+ NEXT
+
+imull: lwbrx op2,MEM
+ SET_FLAGS(FLAGS_MUL)
+ lwbrx op1,EAX,state
+ li r3,EDX
+ mulhw r4,op1,op2
+ mullw result,op1,op2
+ stwbrx r4,r3,state
+ srawi r3,result,31
+ cmpw r3,r4
+ beq+ nop
+ oris flags,flags,(CF_SET|OF_SET)>>16
+ NEXT
+
+/* Other multiplies */
+imulw_mem_reg: lhbrx op2,REG
+ extsh op2,op2
+ b 1f
+
+imulw_imm: NEXTWORD(op2)
+ extsh op2,op2
+ b 1f
+
+imulw_imm8: NEXTBYTE(op2)
+ extsb op2,op2
+1: lhbrx op1,MEM
+ extsh op1,op1
+ mullw result,op1,op2
+ SET_FLAGS(FLAGS_MUL)
+ extsh r3,result
+ sthbrx result,REG
+ cmpw r3,result
+ beq+ nop
+ oris flags,flags,(CF_SET|OF_SET)>>16
+ NEXT # SF/ZF/AF/PF undefined !
+
+imull_mem_reg: lwbrx op2,REG
+ b 1f
+
+imull_imm: NEXTDWORD(op2)
+ b 1f
+
+imull_imm8: NEXTBYTE(op2)
+ extsb op2,op2
+1: lwbrx op1,MEM
+ mullw result,op1,op2
+ SET_FLAGS(FLAGS_MUL)
+ mulhw r3,op1,op2
+ srawi r4,result,31
+ stwbrx result,REG
+ cmpw r3,r4
+ beq+ nop
+ oris flags,flags,(CF_SET|OF_SET)>>16
+ NEXT # SF/ZF/AF/PF undefined !
+
+/* aad is indeed a multiply */
+aad: NEXTBYTE(r3)
+ lbz op1,AH(state)
+ lbz op2,AL(state)
+ mullw result,op1,r3 # AH*imm
+ SET_FLAGS(FLAGS_LOG(B)) # SF/ZF/PF from result
+ add result,result,op2 # AH*imm+AL
+ slwi r3,result,8
+ sth r3,AX(state) # AH=0
+ NEXT # OF/AF/CF undefined
+
+/* Unsigned divides: we may destroy all flags */
+divb: lhbrx r4,AX,state
+ lbzx r3,MEM
+ srwi r5,r4,8
+ cmplw r5,r3
+ bnl- _divide_error
+ divwu r5,r4,r3
+ mullw r3,r5,r3
+ sub r3,r4,r3
+ stb r5,AL(state)
+ stb r3,AH(state)
+ NEXT
+
+divw: li opreg,DX
+ lhbrx r4,AX,state
+ lhbrx r5,REG
+ lhbrx r3,MEM
+ insrwi r4,r5,16,0
+ cmplw r5,r3
+ bnl- _divide_error
+ divwu r5,r4,r3
+ mullw r3,r5,r3
+ sub r3,r4,r3
+ sthbrx r5,AX,state
+ sthbrx r3,REG
+ NEXT
+
+divl: li opreg,EDX # Not yet fully implemented
+ lwbrx r3,MEM
+ lwbrx r4,REG
+ lwbrx r5,EAX,state
+ cmplw r4,r3
+ bnl- _divide_error
+ cmplwi r4,0
+ bne- 1f
+ divwu r4,r5,r3
+ mullw r3,r4,r3
+ stwbrx r4,EAX,state
+ sub r3,r5,r3
+ stwbrx r3,REG
+ NEXT
+/* full implementation of 64:32 unsigned divide, slow but rarely used */
+1: bl _div_64_32
+ stwbrx r5,EAX,state
+ stwbrx r4,REG
+ NEXT
+/*
+ * Divide r4:r5 by r3, quotient in r5, remainder in r4.
+ * The algorithm is stupid because it won't be used very often.
+ */
+_div_64_32: li r7,32
+ mtctr r7
+1: cmpwi r4,0 # always subtract in case
+ addc r5,r5,r5 # MSB is set
+ adde r4,r4,r4
+ blt 2f
+ cmplw r4,r3
+ blt 3f
+2: sub r4,r4,r3
+ addi r5,r5,1
+3: bdnz 1b
+
+/* Signed divides: we may destroy all flags */
+idivb: lbzx r3,MEM
+ lhbrx r4,AX,state
+ cmpwi r3,0
+ beq- _divide_error
+ divw r5,r4,r3
+ extsb r7,r5
+ mullw r3,r5,r3
+ cmpw r5,r7
+ sub r3,r4,r3
+ bne- _divide_error
+ stb r5,AL(state)
+ stb r3,AH(state)
+ NEXT
+
+idivw: li opreg,DX
+ lhbrx r4,AX,state
+ lhbrx r5,REG
+ lhbrx r3,MEM
+ insrwi r4,r5,16,0
+ cmpwi r3,0
+ beq- _divide_error
+ divw r5,r4,r3
+ extsh r7,r5
+ mullw r3,r5,r3
+ cmpw r5,r7
+ sub r3,r4,r3
+ bne- _divide_error
+ sthbrx r5,AX,state
+ sthbrx r3,REG
+ NEXT
+
+idivl: li opreg,EDX # Not yet fully implemented
+ lwbrx r3,MEM
+ lwbrx r5,EAX,state
+ cmpwi cr1,r3,0
+ lwbrx r4,REG
+ srwi r7,r5,31
+ beq- _divide_error
+ add. r7,r7,r4
+ bne- 1f # EDX not sign extension of EAX
+ divw r4,r5,r3
+ xoris r7,r5,0x8000 # only overflow case is
+ orc. r7,r7,r3 # 0x80000000 divided by -1
+ mullw r3,r4,r3
+ beq- _divide_error
+ stwbrx r4,EAX,state
+ sub r3,r5,r3
+ stwbrx r3,REG
+ NEXT
+
+/* full 64 by 32 signed divide, checks for overflow might be right now */
+1: srawi r6,r4,31 # absolute value of r4:r5
+ srawi r0,r3,31 # absolute value of r3
+ xor r5,r5,r6
+ xor r3,r3,r0
+ subfc r5,r6,r5
+ xor r4,r4,r6
+ sub r3,r3,r0
+ subfe r4,r6,r4
+ xor r0,r0,r6 # sign of result
+ cmplw r4,r3 # coarse overflow detection
+ bnl- _divide_error # (probably not necessary)
+ bl _div_64_32
+ xor r5,r5,r0 # apply sign to result
+ sub r5,r5,r0
+ xor. r7,r0,r5 # wrong sign: overflow
+ xor r4,r4,r6 # apply sign to remainder
+ blt- _divide_error
+ stwbrx r5,EAX,state
+ sub r4,r4,r6
+ stwbrx r4,REG
+ NEXT
+
+/* aam is indeed a divide */
+aam: NEXTBYTE(r3)
+ lbz r4,AL(state)
+ cmpwi r3,0
+ beq- _divide_error # zero divide
+ divwu op2,r4,r3 # AL/imm8
+ SET_FLAGS(FLAGS_LOG(B)) # SF/ZF/PF from AL
+ mullw r3,op2,r3 # (AL/imm8)*imm8
+ stb op2,AH(state)
+ sub result,r4,r3 # AL-imm8*(AL/imm8)
+ stb result,AL(state)
+ NEXT # OF/AF/CF undefined
+
+_divide_error: li r3,code_divide_err
+ b complex
+
+/* Instructions dealing with segment registers */
+pushw_sp_sr: li r3,SP
+ rlwinm opreg,opcode,31,27,29
+ addi r5,state,SELECTORS+2
+ lhbrx r4,state,r3
+ lhzx r0,r5,opreg
+ addi r4,r4,-2
+ sthbrx r4,state,r3
+ clrlwi r4,r4,16
+ sthbrx r0,r4,ssb
+ NEXT
+
+pushl_sp_sr: li r3,SP
+ rlwinm opreg,opcode,31,27,29
+ addi r5,state,SELECTORS+2
+ lhbrx r4,state,r3
+ lhzx r0,r5,opreg
+ addi r4,r4,-4
+ sthbrx r4,state,r3
+ clrlwi r4,r4,16
+ stwbrx r0,r4,ssb
+ NEXT
+
+movl_sr_mem: cmpwi opreg,20
+ addi opreg,opreg,SELECTORS+2
+ cmpw cr1,base,state # Only registers are sensitive
+ bgt- ud # to word/longword difference
+ lhzx r0,REG
+ bne cr1,1f
+ stwbrx r0,MEM # Actually a register
+ NEXT
+
+movw_sr_mem: cmpwi opreg,20 # SREG 0 to 5 only
+ addi opreg,opreg,SELECTORS+2
+ bgt- ud
+ lhzx r0,REG
+1: sthbrx r0,MEM
+ NEXT
+
+/* Now the instructions that modify the segment registers, note that
+move/pop to ss disable interrupts and traps for one instruction ! */
+popl_sp_sr: li r6,4
+ b 1f
+popw_sp_sr: li r6,2
+1: li r7,SP
+ rlwinm opreg,opcode,31,27,29
+ lhbrx offset,state,r7
+ addi opreg,opreg,SELBASES
+ lhbrx r4,ssb,offset # new selector
+ add offset,offset,r6
+ bl _segment_load
+ sthbrx offset,state,r7 # update sp
+ cmpwi opreg,8 # is ss ?
+ stwux r3,REG
+ stw r4,SELECTORS-SELBASES(opreg)
+ lwz esb,esbase(state)
+ bne+ nop
+ lwz ssb,ssbase(state) # pop ss
+ crmove RF,TF # prevent traps
+ NEXT
+
+movw_mem_sr: cmpwi opreg,20
+ addi r7,state,SELBASES
+ bgt- ud
+ cmpwi opreg,4 # CS illegal
+ beq- ud
+ lhbrx r4,MEM
+ bl _segment_load
+ stwux r3,r7,opreg
+ cmpwi opreg,8
+ stw r4,SELECTORS-SELBASES(r7)
+ lwz esb,esbase(state)
+ bne+ nop
+ lwz ssb,ssbase(state)
+ crmove RF,TF # prevent traps
+ NEXT
+
+ .equ movl_mem_sr, movw_mem_sr
+
+/* The encoding of les/lss/lds/lfs/lgs is strange, opcode is c4/b2/c5/b4/b5
+for es/ss/ds/fs/gs which are sreg 0/2/3/4/5. And obviously there is
+no lcs instruction, it's called a far jump. */
+
+ldlptrl: lwzux r7,MEM
+ li r4,4
+ bl 1f
+ stwx r7,REG
+ NEXT
+ldlptrw: lhzux r7,MEM
+ li r4,2
+ bl 1f
+ sthx r7,REG
+ NEXT
+
+1: cmpw base,state
+ lis r3,0xc011 # es/ss/ds/fs/gs
+ rlwinm r5,opcode,2,0x0c # 00/08/04/00/04
+ mflr r0
+ addi r3,r3,0x4800 # r4=0xc0114800
+ rlwimi r5,opcode,0,0x10 # 00/18/04/10/14
+ lhbrx r4,r4,offset
+ rlwnm opcode,r3,r5,0x1c # 00/08/0c/10/14 = sreg*4 !
+ beq- ud # Only mem operands allowed !
+ bl _segment_load
+ addi r5,opcode,SELBASES
+ stwux r3,r5,state
+ mtlr r0
+ stw r4,SELECTORS-SELBASES(r5)
+ lwz esb,esbase(state) # keep shadow state in sync
+ lwz ssb,ssbase(state)
+ blr
+
+/* Intructions that may modify the current code segment: the next optimization
+ * might be to avoid calling C code when the code segment does not change. But
+ * it's probably not worth the effort.
+ */
+/* Far calls, jumps and returns */
+lcall_w: NEXTWORD(r4)
+ NEXTWORD(r5)
+ li r3,code_lcallw
+ b complex
+
+lcall_l: NEXTDWORD(r4)
+ NEXTWORD(r5)
+ li r3,code_lcalll
+ b complex
+
+lcallw: lhbrx r4,MEM
+ addi offset,offset,2
+ lhbrx r5,MEM
+ li r3,code_lcallw
+ b complex
+
+lcalll: lwbrx r4,MEM
+ addi offset,offset,4
+ lhbrx r5,MEM
+ li r3,code_lcalll
+ b complex
+
+ljmp_w: NEXTWORD(r4)
+ NEXTWORD(r5)
+ li r3,code_ljmpw
+ b complex
+
+ljmp_l: NEXTDWORD(r4)
+ NEXTWORD(r5)
+ li r3,code_ljmpl
+ b complex
+
+ljmpw: lhbrx r4,MEM
+ addi offset,offset,2
+ lhbrx r5,MEM
+ li r3,code_ljmpw
+ b complex
+
+ljmpl: lwbrx r4,MEM
+ addi offset,offset,4
+ lhbrx r5,MEM
+ li r3,code_ljmpl
+ b complex
+
+lretw_imm: NEXTWORD(r4)
+ b 1f
+lretw: li r4,0
+1: li r3,code_lretw
+ b complex
+
+lretl_imm: NEXTWORD(r4)
+ b 1f
+lretl: li r4,0
+1: li r3,code_lretl
+ b complex
+
+/* Interrupts */
+int: li r3,code_softint # handled by C code
+ NEXTBYTE(r4)
+ b complex
+
+int3: li r3,code_int3 # handled by C code
+ b complex
+
+into: EVAL_OF
+ bf+ OF,nop
+ li r3,code_into
+ b complex # handled by C code
+
+iretw: li r3,code_iretw # handled by C code
+ b complex
+
+iretl: li r3,code_iretl
+ b complex
+
+/* Miscellaneous flag control instructions */
+clc: oris flags,flags,(CF_IN_CR|CF_STATE_MASK|ABOVE_IN_CR)>>16
+ xoris flags,flags,(CF_IN_CR|CF_STATE_MASK|ABOVE_IN_CR)>>16
+ NEXT
+
+cmc: oris flags,flags,(CF_IN_CR|ABOVE_IN_CR)>>16
+ xoris flags,flags,(CF_IN_CR|CF_COMPLEMENT|ABOVE_IN_CR)>>16
+ NEXT
+
+stc: oris flags,flags,\
+ (CF_IN_CR|CF_LOCATION|CF_COMPLEMENT|ABOVE_IN_CR)>>16
+ xoris flags,flags,(CF_IN_CR|CF_LOCATION|ABOVE_IN_CR)>>16
+ NEXT
+
+cld: crclr DF
+ NEXT
+
+std: crset DF
+ NEXT
+
+cli: crclr IF
+ NEXT
+
+sti: crset IF
+ NEXT
+
+lahf: bl _eval_flags
+ stb r3,AH(state)
+ NEXT
+
+sahf: andis. r3,flags,OF_EXPLICIT>>16
+ lbz r0,AH(state)
+ beql+ _eval_of # save OF just in case
+ rlwinm op1,r0,31,0x08 # AF
+ rlwinm flags,flags,0,OF_STATE_MASK
+ extsb result,r0 # SF/PF
+ ZF862ZF(r0)
+ oris flags,flags,(ZF_PROTECT|ZF_IN_CR|SF_IN_CR)>>16
+ addi op2,op1,0 # AF
+ ori result,result,0x00fb # set all except PF
+ mtcrf 0x02,r0 # SF/ZF
+ rlwimi flags,r0,27,CF_VALUE # CF
+ xori result,result,0x00ff # 00 if PF set, 04 if clear
+ NEXT
+
+pushfw_sp: bl _eval_flags
+ li r4,SP
+ lhbrx r5,r4,state
+ addi r5,r5,-2
+ sthbrx r5,r4,state
+ clrlwi r5,r5,16
+ sthbrx r3,ssb,r5
+ NEXT
+
+pushfl_sp: bl _eval_flags
+ li r4,SP
+ lhbrx r5,r4,state
+ addi r5,r5,-4
+ sthbrx r5,r4,state
+ clrlwi r5,r5,16
+ stwbrx r3,ssb,r5
+ NEXT
+
+popfl_sp: li r4,SP
+ lhbrx r5,r4,state
+ lwbrx r3,ssb,r5
+ addi r5,r5,4
+ stw r3,eflags(state)
+ sthbrx r5,r4,state
+ b 1f
+
+popfw_sp: li r4,SP
+ lhbrx r5,r4,state
+ lhbrx r3,ssb,r5
+ addi r5,r5,2
+ sth r3,eflags+2(state)
+ sthbrx r5,r4,state
+1: rlwinm op1,r3,31,0x08 # AF
+ xori result,r3,4 # PF
+ ZF862ZF(r3) # cr6
+ lis flags,(OF_EXPLICIT|ZF_PROTECT|ZF_IN_CR|SF_IN_CR)>>16
+ addi op2,op1,0 # AF
+ rlwinm result,result,0,0x04 # PF
+ rlwimi flags,r3,27,CF_VALUE # CF
+ mtcrf 0x6,r3 # IF,DF,TF,SF,ZF
+ rlwimi result,r3,24,0,0 # SF
+ rlwimi flags,r3,15,OF_VALUE # OF
+ NEXT
+
+/* SETcc is slightly faster for setz/setnz */
+setz: EVAL_ZF
+ bt ZF,1f
+0: cmpwi opreg,0
+ bne- ud
+ stbx opreg,MEM
+ NEXT
+
+setnz: EVAL_ZF
+ bt ZF,0b
+1: cmpwi opreg,0
+ bne- ud
+ stbx one,MEM
+ NEXT
+
+#define SETCC(cond, eval, flag) \
+set##cond: EVAL_##eval; bt flag,1b; b 0b; \
+setn##cond: EVAL_##eval; bt flag,0b; b 1b
+
+ SETCC(c, CF, CF)
+ SETCC(a, ABOVE, ABOVE)
+ SETCC(s, SF, SF)
+ SETCC(g, SIGNED, SGT)
+ SETCC(l, SIGNED, SLT)
+ SETCC(o, OF, OF)
+ SETCC(p, PF, PF)
+
+/* No wait for a 486SX */
+ .equ wait, nop
+
+/* ARPL is not recognized in real mode */
+ .equ arpl, ud
+
+/* clts and in general control and debug registers are not implemented */
+ .equ clts, unimpl
+
+aaa: lhbrx r0,AX,state
+ bl _eval_af
+ rlwinm r3,r3,0,0x10
+ SET_FLAGS(FLAGS_ADD(W))
+ rlwimi r3,r0,0,0x0f
+ li r4,0x106
+ addi r3,r3,-10
+ srwi r3,r3,16 # carry ? 0 : 0xffff
+ andc op1,r4,r3 # carry ? 0x106 : 0
+ add result,r0,op1
+ rlwinm result,result,0,28,23 # clear high half of AL
+ li op2,10 # sets AF indirectly
+ sthbrx r3,AX,state # OF/SF/ZF/PF undefined !
+ rlwimi result,op1,8,0x10000 # insert CF
+ NEXT
+
+aas: lhbrx r0,AX,state
+ bl _eval_af
+ rlwinm r3,r3,0,0x10
+ SET_FLAGS(FLAGS_ADD(W))
+ rlwimi r3,r0,0,0x0f # AF:AL&0x0f
+ li r4,0x106
+ addi r3,r3,-10
+ srwi r3,r3,16 # carry ? 0 : 0xffff
+ andc op1,r4,r3 # carry ? 0x106 : 0
+ sub result,r0,op1
+ rlwinm result,result,0,28,23 # clear high half of AL
+ li op2,10 # sets AF indirectly
+ sthbrx r3,AX,state # OF/SF/ZF/PF undefined !
+ rlwimi result,op1,8,0x10000 # insert CF
+ NEXT
+
+daa: lbz r0,AL(state)
+ bl _eval_af
+ rlwinm r7,r3,0,0x10
+ bl _eval_cf # r3=CF<<8
+ rlwimi r7,r0,0,0x0f
+ SET_FLAGS(FLAGS_ADD(B))
+ addi r4,r7,-10
+ rlwinm r4,r4,3,0x06 # 6 if AF or >9, 0 otherwise
+ srwi op1,r7,1 # 0..4, no AF, 5..f AF set
+ add r0,r0,r4 # conditional add
+ li op2,11 # sets AF depnding on op1
+ or r0,r0,r3
+ subfic r3,r0,159
+ rlwinm r3,r3,7,0x60 # mask value to add
+ add result,r0,r3 # final result for SF/ZF/PF
+ stb result,AL(state)
+ rlwimi result,r3,2,0x100 # set CF if added
+ NEXT
+
+das: lbz r0,AL(state)
+ bl _eval_af
+ rlwinm r7,r3,0,0x10
+ bl _eval_cf
+ rlwimi r7,r0,0,0x0f
+ SET_FLAGS(FLAGS_ADD(B))
+ addi r4,r7,-10
+ rlwinm r4,r4,3,0x06
+ srwi op1,r7,1 # 0..4, no AF, 5..f AF set
+ sub r0,r0,r4 # conditional add
+ li op2,11 # sets AF depending on op1
+ or r4,r0,r3 # insert CF
+ addi r3,r4,-160
+ rlwinm r3,r3,7,0x60 # mask value to add
+ sub result,r4,r3 # final result for SF/ZF/PF
+ stb result,AL(state)
+ rlwimi result,r3,2,0x100 # set CF
+ NEXT
+
+/* 486 specific instructions */
+
+/* For cmpxchg, only the zero flag is important */
+
+cmpxchgb: lbz op1,AL(state)
+ SET_FLAGS(FLAGS_SUB(B)|ZF_IN_CR)
+ lbzx op2,MEM
+ cmpw cr6,op1,op2
+ sub result,op1,op2
+ bne cr6,1f
+ lbzx r3,REG # success: swap
+ stbx r3,MEM
+ NEXT
+1: stb op2,AL(state)
+ NEXT
+
+cmpxchgw: lhbrx op1,AX,state
+ SET_FLAGS(FLAGS_SUB(W)|ZF_IN_CR)
+ lhbrx op2,MEM
+ cmpw cr6,op1,op2
+ sub result,op1,op2
+ bne cr6,1f
+ lhzx r3,REG # success: swap
+ sthx r3,MEM
+ NEXT
+1: sthbrx op2,AX,state
+ NEXT
+
+cmpxchgl: lwbrx op1,EAX,state
+ SET_FLAGS(FLAGS_SUB(L)|ZF_IN_CR|SIGNED_IN_CR)
+ lwbrx op2,MEM
+ cmpw cr6,op1,op2
+ sub result,op1,op2
+ bne cr6,1f
+ lwzx r3,REG # success: swap
+ stwx r3,MEM
+ NEXT
+1: stwbrx op2,EAX,state
+ NEXT
+
+xaddb: lbzx op2,MEM
+ SET_FLAGS(FLAGS_ADD(B))
+ lbzx op1,REG
+ add result,op1,op2
+ stbx result,MEM
+ stbx op2,REG
+ NEXT
+
+xaddw: lhbrx op2,MEM
+ SET_FLAGS(FLAGS_ADD(W))
+ lhbrx op1,REG
+ add result,op1,op2
+ sthbrx result,MEM
+ sthbrx op2,REG
+ NEXT
+
+xaddl: lwbrx op2,MEM
+ SET_FLAGS(FLAGS_ADD(L))
+ lwbrx op1,REG
+ add result,op1,op2
+ stwbrx result,MEM
+ stwbrx op2,REG
+ NEXT
+
+/* All FPU instructions skipped. This is a 486 SX ! */
+esc: li r3,code_dna # DNA interrupt
+ b complex
+
+ .equ hlt, unimpl # Cannot stop
+
+ .equ invd, unimpl
+
+/* Undefined in real address mode */
+ .equ lar, ud
+
+ .equ lgdt, unimpl
+ .equ lidt, unimpl
+ .equ lldt, ud
+ .equ lmsw, unimpl
+
+/* protected mode only */
+ .equ lsl, ud
+ .equ ltr, ud
+
+ .equ movl_cr_reg, unimpl
+ .equ movl_reg_cr, unimpl
+ .equ movl_dr_reg, unimpl
+ .equ movl_reg_dr, unimpl
+
+ .equ sgdt, unimpl
+
+ .equ sidt, unimpl
+ .equ sldt, ud
+ .equ smsw, unimpl
+
+ .equ str, ud
+
+ud: li r3,code_ud
+ li r4,0
+ b complex
+
+unimpl: li r3,code_ud
+ li r4,1
+ b complex
+
+ .equ verr, ud
+ .equ verw, ud
+ .equ wbinvd, unimpl
+
+em86_end:
+ .size em86_enter,em86_end-em86_enter
+#ifdef __BOOT__
+ .data
+#define ENTRY(x,t) .long x+t-_jtables
+#else
+ .section .rodata
+#define ENTRY(x,t) .long x+t
+#endif
+
+#define BOP(x) ENTRY(x,2) /* Byte operation with mod/rm byte */
+#define WLOP(x) ENTRY(x,3) /* 16 or 32 bit operation with mod/rm byte */
+#define EXTOP(x) ENTRY(x,0) /* Opcode with extension in mod/rm byte */
+#define OP(x) ENTRY(x,1) /* Direct one byte opcode/prefix */
+
+/* A few macros for the main table */
+#define gen6(op, wl, axeax) \
+ BOP(op##b##_reg_mem); WLOP(op##wl##_reg_mem); \
+ BOP(op##b##_mem_reg); WLOP(op##wl##_mem_reg); \
+ OP(op##b##_imm_al); OP(op##wl##_imm_##axeax)
+
+#define rep7(l,t) \
+ ENTRY(l,t); ENTRY(l,t); ENTRY(l,t); ENTRY(l,t); \
+ ENTRY(l,t); ENTRY(l,t); ENTRY(l,t)
+
+#define rep8(l) l ; l; l; l; l; l; l; l;
+
+#define allcond(pfx, sfx, t) \
+ ENTRY(pfx##o##sfx, t); ENTRY(pfx##no##sfx, t); \
+ ENTRY(pfx##c##sfx, t); ENTRY(pfx##nc##sfx, t); \
+ ENTRY(pfx##z##sfx, t); ENTRY(pfx##nz##sfx, t); \
+ ENTRY(pfx##na##sfx, t); ENTRY(pfx##a##sfx, t); \
+ ENTRY(pfx##s##sfx, t); ENTRY(pfx##ns##sfx, t); \
+ ENTRY(pfx##p##sfx, t); ENTRY(pfx##np##sfx, t); \
+ ENTRY(pfx##l##sfx, t); ENTRY(pfx##nl##sfx, t); \
+ ENTRY(pfx##ng##sfx, t); ENTRY(pfx##g##sfx, t)
+
+/* single/double register sign extensions and other oddities */
+#define h2sextw cbw /* Half to Single sign extension */
+#define s2dextw cwd /* Single to Double sign extension */
+#define h2sextl cwde
+#define s2dextl cdq
+#define j_a16_cxz_w jcxz_w
+#define j_a32_cxz_w jecxz_w
+#define j_a16_cxz_l jcxz_l
+#define j_a32_cxz_l jecxz_l
+#define loopa16_w loopw_w
+#define loopa16_l loopw_l
+#define loopa32_w loopl_w
+#define loopa32_l loopl_l
+#define loopnza16_w loopnzw_w
+#define loopnza16_l loopnzw_l
+#define loopnza32_w loopnzl_w
+#define loopnza32_l loopnzl_l
+#define loopza16_w loopzw_w
+#define loopza16_l loopzw_l
+#define loopza32_w loopzl_w
+#define loopza32_l loopzl_l
+/* No FP support */
+
+/* Addressing mode table */
+ .align 5
+# (%bx,%si), (%bx,%di), (%bp,%si), (%bp,%di)
+adtable: .long 0x00004360, 0x00004370, 0x80004560, 0x80004570
+# (%si), (%di), o16, (%bx)
+ .long 0x00004600, 0x00004700, 0x00002000, 0x00004300
+# o8(%bx,%si), o8(%bx,%di), o8(%bp,%si), o8(%bp,%di)
+ .long 0x00004360, 0x00004370, 0x80004560, 0x80004570
+# o8(%si), o8(%di), o8(%bp), o8(%bx)
+ .long 0x00004600, 0x00004700, 0x80004500, 0x00004300
+# o16(%bx,%si), o16(%bx,%di), o16(%bp,%si), o16(%bp,%di)
+ .long 0x00004360, 0x00004370, 0x80004560, 0x80004570
+# o16(%si), o16(%di), o16(%bp), o16(%bx)
+ .long 0x00004600, 0x00004700, 0x80004500, 0x00004300
+# register addressing modes do not use the table
+ .long 0, 0, 0, 0, 0, 0, 0, 0
+#now 32 bit modes
+# (%eax), (%ecx), (%edx), (%ebx)
+ .long 0x00004090, 0x00004190, 0x00004290, 0x00004390
+# sib, o32, (%esi), (%edi)
+ .long 0x00003090, 0x00002090, 0x00004690, 0x00004790
+# o8(%eax), o8(%ecx), o8(%edx), o8(%ebx)
+ .long 0x00004090, 0x00004190, 0x00004290, 0x00004390
+# sib, o8(%ebp), o8(%esi), o8(%edi)
+ .long 0x00003090, 0x80004590, 0x00004690, 0x00004790
+# o32(%eax), o32(%ecx), o32(%edx), o32(%ebx)
+ .long 0x00004090, 0x00004190, 0x00004290, 0x00004390
+# sib, o32(%ebp), o32(%esi), o32(%edi)
+ .long 0x00003090, 0x80004590, 0x00004690, 0x00004790
+# register addressing modes do not use the table
+ .long 0, 0, 0, 0, 0, 0, 0, 0
+
+#define jtable(wl, awl, spesp, axeax, name ) \
+ .align 5; \
+jtab_##name: gen6(add, wl, axeax); \
+ OP(push##wl##_##spesp##_sr); \
+ OP(pop##wl##_##spesp##_sr); \
+ gen6(or, wl, axeax); \
+ OP(push##wl##_##spesp##_sr); \
+ OP(_twobytes); \
+ gen6(adc, wl, axeax); \
+ OP(push##wl##_##spesp##_sr); \
+ OP(pop##wl##_##spesp##_sr); \
+ gen6(sbb, wl, axeax); \
+ OP(push##wl##_##spesp##_sr); \
+ OP(pop##wl##_##spesp##_sr); \
+ gen6(and, wl, axeax); OP(_es); OP(daa); \
+ gen6(sub, wl, axeax); OP(_cs); OP(das); \
+ gen6(xor, wl, axeax); OP(_ss); OP(aaa); \
+ gen6(cmp, wl, axeax); OP(_ds); OP(aas); \
+ rep8(OP(inc##wl##_reg)); \
+ rep8(OP(dec##wl##_reg)); \
+ rep8(OP(push##wl##_##spesp##_reg)); \
+ rep8(OP(pop##wl##_##spesp##_reg)); \
+ OP(pusha##wl##_##spesp); OP(popa##wl##_##spesp); \
+ WLOP(bound##wl); WLOP(arpl); \
+ OP(_fs); OP(_gs); OP(_opsize); OP(_adsize); \
+ OP(push##wl##_##spesp##_imm); WLOP(imul##wl##_imm); \
+ OP(push##wl##_##spesp##_imm8); WLOP(imul##wl##_imm8); \
+ OP(insb_##awl); OP(ins##wl##_##awl); \
+ OP(outsb_##awl); OP(outs##wl##_##awl); \
+ allcond(sj,_##wl,1); \
+ EXTOP(grp1b_imm); EXTOP(grp1##wl##_imm); \
+ EXTOP(grp1b_imm); EXTOP(grp1##wl##_imm8); \
+ BOP(testb_reg_mem); WLOP(test##wl##_reg_mem); \
+ BOP(xchgb_reg_mem); WLOP(xchg##wl##_reg_mem); \
+ BOP(movb_reg_mem); WLOP(mov##wl##_reg_mem); \
+ BOP(movb_mem_reg); WLOP(mov##wl##_mem_reg); \
+ WLOP(mov##wl##_sr_mem); WLOP(lea##wl); \
+ WLOP(mov##wl##_mem_sr); WLOP(pop##wl##_##spesp##_##awl); \
+ OP(nop); rep7(xchg##wl##_##axeax##_reg,1); \
+ OP(h2sext##wl); OP(s2dext##wl); \
+ OP(lcall_##wl); OP(wait); \
+ OP(pushf##wl##_##spesp); OP(popf##wl##_##spesp); \
+ OP(sahf); OP(lahf); \
+ OP(movb_##awl##_al); OP(mov##wl##_##awl##_##axeax); \
+ OP(movb_al_##awl); OP(mov##wl##_##axeax##_##awl); \
+ OP(movsb_##awl); OP(movs##wl##_##awl); \
+ OP(cmpsb_##awl); OP(cmps##wl##_##awl); \
+ OP(testb_imm_al); OP(test##wl##_imm_##axeax); \
+ OP(stosb_##awl); OP(stos##wl##_##awl); \
+ OP(lodsb_##awl); OP(lods##wl##_##awl); \
+ OP(scasb_##awl); OP(scas##wl##_##awl); \
+ rep8(OP(movb_imm_reg)); \
+ rep8(OP(mov##wl##_imm_reg)); \
+ EXTOP(shiftb_imm); EXTOP(shift##wl##_imm); \
+ OP(ret##wl##_##spesp##_imm); OP(ret##wl##_##spesp); \
+ WLOP(ldlptr##wl); WLOP(ldlptr##wl); \
+ BOP(movb_imm_mem); WLOP(mov##wl##_imm_mem); \
+ OP(enter##wl##_##spesp); OP(leave##wl##_##spesp); \
+ OP(lret##wl##_imm); OP(lret##wl); \
+ OP(int3); OP(int); OP(into); OP(iret##wl); \
+ EXTOP(shiftb_1); EXTOP(shift##wl##_1); \
+ EXTOP(shiftb_cl); EXTOP(shift##wl##_cl); \
+ OP(aam); OP(aad); OP(ud); OP(xlatb_##awl); \
+ rep8(OP(esc)); \
+ OP(loopnz##awl##_##wl); OP(loopz##awl##_##wl); \
+ OP(loop##awl##_##wl); OP(j_##awl##_cxz_##wl); \
+ OP(inb_port_al); OP(in##wl##_port_##axeax); \
+ OP(outb_al_port); OP(out##wl##_##axeax##_port); \
+ OP(call##wl##_##spesp); OP(jmp_##wl); \
+ OP(ljmp_##wl); OP(sjmp_##wl); \
+ OP(inb_dx_al); OP(in##wl##_dx_##axeax); \
+ OP(outb_al_dx); OP(out##wl##_##axeax##_dx); \
+ OP(_lock); OP(ud); OP(_repnz); OP(_repz); \
+ OP(hlt); OP(cmc); \
+ EXTOP(grp3b); EXTOP(grp3##wl); \
+ OP(clc); OP(stc); OP(cli); OP(sti); \
+ OP(cld); OP(std); \
+ EXTOP(grp4b); EXTOP(grp5##wl##_##spesp); \
+ /* Here we start the table for twobyte instructions */ \
+ OP(ud); OP(ud); WLOP(lar); WLOP(lsl); \
+ OP(ud); OP(ud); OP(clts); OP(ud); \
+ OP(invd); OP(wbinvd); OP(ud); OP(ud); \
+ OP(ud); OP(ud); OP(ud); OP(ud); \
+ rep8(OP(ud)); \
+ rep8(OP(ud)); \
+ OP(movl_cr_reg); OP(movl_reg_cr); \
+ OP(movl_dr_reg); OP(movl_reg_dr); \
+ OP(ud); OP(ud); OP(ud); OP(ud); \
+ rep8(OP(ud)); \
+ /* .long wrmsr, rdtsc, rdmsr, rdpmc; */\
+ rep8(OP(ud)); \
+ rep8(OP(ud)); \
+ /* allcond(cmov, wl); */ \
+ rep8(OP(ud)); rep8(OP(ud)); \
+ rep8(OP(ud)); rep8(OP(ud)); \
+ /* MMX Start */ \
+ rep8(OP(ud)); rep8(OP(ud)); \
+ rep8(OP(ud)); rep8(OP(ud)); \
+ /* MMX End */ \
+ allcond(j,_##wl, 1); \
+ allcond(set,,2); \
+ OP(push##wl##_##spesp##_sr); OP(pop##wl##_##spesp##_sr); \
+ OP(ud) /* cpuid */; WLOP(bt##wl##_reg_mem); \
+ WLOP(shld##wl##_imm); WLOP(shld##wl##_cl); \
+ OP(ud); OP(ud); \
+ OP(push##wl##_##spesp##_sr); OP(pop##wl##_##spesp##_sr); \
+ OP(ud) /* rsm */; WLOP(bts##wl##_reg_mem); \
+ WLOP(shrd##wl##_imm); WLOP(shrd##wl##_cl); \
+ OP(ud); WLOP(imul##wl##_mem_reg); \
+ BOP(cmpxchgb); WLOP(cmpxchg##wl); \
+ WLOP(ldlptr##wl); WLOP(btr##wl##_reg_mem); \
+ WLOP(ldlptr##wl); WLOP(ldlptr##wl); \
+ WLOP(movzb##wl); WLOP(movzw##wl); \
+ OP(ud); OP(ud); \
+ EXTOP(grp8##wl); WLOP(btc##wl##_reg_mem); \
+ WLOP(bsf##wl); WLOP(bsr##wl); \
+ WLOP(movsb##wl); WLOP(movsw##wl); \
+ BOP(xaddb); WLOP(xadd##wl); \
+ OP(ud); OP(ud); \
+ OP(ud); OP(ud); OP(ud); OP(ud); \
+ rep8(OP(bswap)); \
+ /* MMX Start */ \
+ rep8(OP(ud)); rep8(OP(ud)); \
+ rep8(OP(ud)); rep8(OP(ud)); \
+ rep8(OP(ud)); rep8(OP(ud)); \
+ /* MMX End */
+ .align 5 /* 8kb of tables, 32 byte aligned */
+_jtables: jtable(w, a16, sp, ax, www) /* data16, addr16 */
+ jtable(l, a16, sp, eax, lww) /* data32, addr16 */
+ jtable(w, a32, sp, ax, wlw) /* data16, addr32 */
+ jtable(l, a32, sp, eax, llw) /* data32, addr32 */
+/* The other possible combinations are only required by protected mode
+code using a big stack segment */
+/* Here are the auxiliary tables for opcode extensions, note that
+all entries get 2 or 3 added. */
+#define grp1table(bwl,t,s8) \
+grp1##bwl##_imm##s8:; \
+ ENTRY(add##bwl##_imm##s8,t); ENTRY(or##bwl##_imm##s8,t); \
+ ENTRY(adc##bwl##_imm##s8,t); ENTRY(sbb##bwl##_imm##s8,t); \
+ ENTRY(and##bwl##_imm##s8,t); ENTRY(sub##bwl##_imm##s8,t); \
+ ENTRY(xor##bwl##_imm##s8,t); ENTRY(cmp##bwl##_imm##s8,t)
+
+ grp1table(b,2,)
+ grp1table(w,3,)
+ grp1table(w,3,8)
+ grp1table(l,3,)
+ grp1table(l,3,8)
+
+#define shifttable(bwl,t,c) \
+shift##bwl##_##c:; \
+ ENTRY(rol##bwl##_##c,t); ENTRY(ror##bwl##_##c,t); \
+ ENTRY(rcl##bwl##_##c,t); ENTRY(rcr##bwl##_##c,t); \
+ ENTRY(shl##bwl##_##c,t); ENTRY(shr##bwl##_##c,t); \
+ OP(ud); ENTRY(sar##bwl##_##c,t)
+
+ shifttable(b,2,1)
+ shifttable(w,3,1)
+ shifttable(l,3,1)
+
+ shifttable(b,2,cl)
+ shifttable(w,3,cl)
+ shifttable(l,3,cl)
+
+ shifttable(b,2,imm)
+ shifttable(w,3,imm)
+ shifttable(l,3,imm)
+
+#define grp3table(bwl,t) \
+grp3##bwl: ENTRY(test##bwl##_imm,t); OP(ud); \
+ ENTRY(not##bwl,t); ENTRY(neg##bwl,t); \
+ ENTRY(mul##bwl,t); ENTRY(imul##bwl,t); \
+ ENTRY(div##bwl,t); ENTRY(idiv##bwl,t)
+
+ grp3table(b,2)
+ grp3table(w,3)
+ grp3table(l,3)
+
+grp4b: BOP(incb); BOP(decb); \
+ OP(ud); OP(ud); \
+ OP(ud); OP(ud); \
+ OP(ud); OP(ud)
+
+#define grp5table(wl,spesp) \
+grp5##wl##_##spesp: \
+ WLOP(inc##wl); WLOP(dec##wl); \
+ WLOP(call##wl##_##spesp##_mem); WLOP(lcall##wl##); \
+ WLOP(jmp##wl); WLOP(ljmp##wl); \
+ WLOP(push##wl##_##spesp); OP(ud)
+
+ grp5table(w,sp)
+ grp5table(l,sp)
+
+#define grp8table(wl) \
+grp8##wl: OP(ud); OP(ud); OP(ud); OP(ud); \
+ WLOP(bt##wl##_imm); WLOP(bts##wl##_imm); \
+ WLOP(btr##wl##_imm); WLOP(btc##wl##_imm)
+
+ grp8table(w)
+ grp8table(l)
+#ifdef __BOOT__
+_endjtables: .long 0 /* Points to _jtables after relocation */
+#endif
diff --git a/bsps/powerpc/motorola_powerpc/bootloader/exception.S b/bsps/powerpc/motorola_powerpc/bootloader/exception.S
new file mode 100644
index 0000000000..0442354552
--- /dev/null
+++ b/bsps/powerpc/motorola_powerpc/bootloader/exception.S
@@ -0,0 +1,471 @@
+/*
+ * exception.S -- Exception handlers for early boot.
+ *
+ * Copyright (C) 1998, 1999 Gabriel Paubert, paubert@iram.es
+ *
+ * Modified to compile in RTEMS development environment
+ * by Eric Valette
+ *
+ * Copyright (C) 1999 Eric Valette. valette@crf.canon.fr
+ *
+ * The license and distribution terms for this file may be
+ * found in the file LICENSE in this distribution or at
+ * http://www.rtems.org/license/LICENSE.
+ */
+
+/* This is an improved version of the TLB interrupt handling code from
+ * the 603e users manual (603eUM.pdf) downloaded from the WWW. All the
+ * visible bugs have been removed. Note that many have survived in the errata
+ * to the 603 user manual (603UMer.pdf).
+ *
+ * This code also pays particular attention to optimization, takes into
+ * account the differences between 603 and 603e, single/multiple processor
+ * systems and tries to order instructions for dual dispatch in many places.
+ *
+ * The optimization has been performed along two lines:
+ * 1) to minimize the number of instruction cache lines needed for the most
+ * common execution paths (the ones that do not result in an exception).
+ * 2) then to order the code to maximize the number of dual issue and
+ * completion opportunities without increasing the number of cache lines
+ * used in the same cases.
+ *
+ * The last goal of this code is to fit inside the address range
+ * assigned to the interrupt vectors: 192 instructions with fixed
+ * entry points every 64 instructions.
+ *
+ * Some typos have also been corrected and the Power l (lowercase L)
+ * instructions replaced by lwz without comment.
+ *
+ * I have attempted to describe the reasons of the order and of the choice
+ * of the instructions but the comments may be hard to understand without
+ * the processor manual.
+ *
+ * Note that the fact that the TLB are reloaded by software in theory
+ * allows tremendous flexibility, for example we could avoid setting the
+ * reference bit of the PTE which will could actually not be accessed because
+ * of protection violation by changing a few lines of code. However,
+ * this would significantly slow down most TLB reload operations, and
+ * this is the reason for which we try never to make checks which would be
+ * redundant with hardware and usually indicate a bug in a program.
+ *
+ * There are some inconsistencies in the documentation concerning the
+ * settings of SRR1 bit 15. All recent documentations say now that it is set
+ * for stores and cleared for loads. Anyway this handler never uses this bit.
+ *
+ * A final remark, the rfi instruction seems to implicitly clear the
+ * MSR<14> (tgpr)bit. The documentation claims that this bit is restored
+ * from SRR1 by rfi, but the corresponding bit in SRR1 is the LRU way bit.
+ * Anyway, the only exception which can occur while TGPR is set is a machine
+ * check which would indicate an unrecoverable problem. Recent documentation
+ * now says in some place that rfi clears MSR<14>.
+ *
+ * TLB software load for 602/603/603e/603ev:
+ * Specific Instructions:
+ * tlbld - write the dtlb with the pte in rpa reg
+ * tlbli - write the itlb with the pte in rpa reg
+ * Specific SPRs:
+ * dmiss - address of dstream miss
+ * imiss - address of istream miss
+ * hash1 - address primary hash PTEG address
+ * hash2 - returns secondary hash PTEG address
+ * iCmp - returns the primary istream compare value
+ * dCmp - returns the primary dstream compare value
+ * rpa - the second word of pte used by tlblx
+ * Other specific resources:
+ * cr0 saved in 4 high order bits of SRR1,
+ * SRR1 bit 14 [WAY] selects TLB set to load from LRU algorithm
+ * gprs r0..r3 shadowed by the setting of MSR bit 14 [TGPR]
+ * other bits in SRR1 (unused by this handler but see earlier comments)
+ *
+ * There are three basic flows corresponding to three vectors:
+ * 0x1000: Instruction TLB miss,
+ * 0x1100: Data TLB miss on load,
+ * 0x1200: Data TLB miss on store or not dirty page
+ */
+
+/* define the following if code does not have to run on basic 603 */
+/* #define USE_KEY_BIT */
+
+/* define the following for safe multiprocessing */
+/* #define MULTIPROCESSING */
+
+/* define the following for mixed endian */
+/* #define CHECK_MIXED_ENDIAN */
+
+/* define the following if entries always have the reference bit set */
+#define ASSUME_REF_SET
+
+/* Some OS kernels may want to keep a single copy of the dirty bit in a per
+ * page table. In this case writable pages are always write-protected as long
+ * as they are clean, and the dirty bit set actually means that the page
+ * is writable.
+ */
+#define DIRTY_MEANS_WRITABLE
+
+#include <rtems/asm.h>
+#include <rtems/score/cpu.h>
+#include "bootldr.h"
+
+/*
+ * Instruction TLB miss flow
+ * Entry at 0x1000 with the following:
+ * srr0 -> address of instruction that missed
+ * srr1 -> 0:3=cr0, 13=1 (instruction), 14=lru way, 16:31=saved MSR
+ * msr<tgpr> -> 1
+ * iMiss -> ea that missed
+ * iCmp -> the compare value for the va that missed
+ * hash1 -> pointer to first hash pteg
+ * hash2 -> pointer to second hash pteg
+ *
+ * Register usage:
+ * r0 is limit address during search / scratch after
+ * r1 is pte data / error code for ISI exception when search fails
+ * r2 is pointer to pte
+ * r3 is compare value during search / scratch after
+ */
+/* Binutils or assembler bug ? Declaring the section executable and writable
+ * generates an error message on the @fixup entries.
+ */
+ .section .exception,"aw"
+# .org 0x1000 # instruction TLB miss entry point
+ .globl tlb_handlers
+tlb_handlers:
+ .type tlb_handlers,@function
+#define ISIVec tlb_handlers-0x1000+0x400
+#define DSIVec tlb_handlers-0x1000+0x300
+ mfspr r2,HASH1
+ lwz r1,0(r2) # Start memory access as soon as possible
+ mfspr r3,ICMP # to load the cache.
+0: la r0,48(r2) # Use explicit loop to avoid using ctr
+1: cmpw r1,r3 # In theory the loop is somewhat slower
+ beq- 2f # than documentation example
+ cmpw r0,r2 # but we gain from starting cache load
+ lwzu r1,8(r2) # earlier and using slots between load
+ bne+ 1b # and comparison for other purposes.
+ cmpw r1,r3
+ bne- 4f # Secondary hash check
+2: lwz r1,4(r2) # Found: load second word of PTE
+ mfspr r0,IMISS # get miss address during load delay
+#ifdef ASSUME_REF_SET
+ andi. r3,r1,8 # check for guarded memory
+ bne- 5f
+ mtspr PPC_RPA,r1
+ mfsrr1 r3
+ tlbli r0
+#else
+/* This is basically the original code from the manual. */
+# andi. r3,r1,8 # check for guarded memory
+# bne- 5f
+# andi. r3,r1,0x100 # check R bit ahead to help folding
+/* However there is a better solution: these last three instructions can be
+replaced by the following which should cause less pipeline stalls because
+both tests are combined and there is a single CR rename buffer */
+ extlwi r3,r1,6,23 # Keep only RCWIMG in 6 most significant bits.
+ rlwinm. r3,r3,5,0,27 # Keep only G (in sign) and R and test.
+ blt- 5f # Negative means guarded, zero R not set.
+ mfsrr1 r3 # get saved cr0 bits now to dual issue
+ ori r1,r1,0x100
+ mtspr PPC_RPA,r1
+ tlbli r0
+/* Do not update PTE if R bit already set, this will save one cache line
+writeback at a later time, and avoid even more bus traffic in
+multiprocessing systems, when several processors access the same PTEGs.
+We also hope that the reference bit will be already set. */
+ bne+ 3f
+#ifdef MULTIPROCESSING
+ srwi r1,r1,8 # get byte 7 of pte
+ stb r1,+6(r2) # update page table
+#else
+ sth r1,+6(r2) # update page table
+#endif
+#endif
+3: mtcrf 0x80,r3 # restore CR0
+ rfi # return to executing program
+
+/* The preceding code is 20 to 25 instructions long, which occupies
+3 or 4 cache lines. */
+4: andi. r0,r3,0x0040 # see if we have done second hash
+ lis r1,0x4000 # set up error code in case next branch taken
+ bne- 6f # speculatively issue the following
+ mfspr r2,HASH2 # get the second pointer
+ ori r3,r3,0x0040 # change the compare value
+ lwz r1,0(r2) # load first entry
+ b 0b # and go back to main loop
+/* We are now at 27 to 32 instructions, using 3 or 4 cache lines for all
+cases in which the TLB is successfully loaded. */
+
+/* Guarded memory protection violation: synthesize an ISI exception. */
+5: lis r1,0x1000 # set srr1<3>=1 to flag guard violation
+/* Entry Not Found branches here with r1 correctly set. */
+6: mfsrr1 r3
+ mfmsr r0
+ insrwi r1,r3,16,16 # build srr1 for ISI exception
+ mtsrr1 r1 # set srr1
+/* It seems few people have realized rlwinm can be used to clear a bit or
+a field of contiguous bits in a register by setting mask_begin>mask_end. */
+ rlwinm r0,r0,0,15,13 # clear the msr<tgpr> bit
+ mtcrf 0x80, r3 # restore CR0
+ mtmsr r0 # flip back to the native gprs
+ isync # Required from 602 doc!
+ b ISIVec # go to instruction access exception
+/* Up to now there are 37 to 42 instructions so at least 20 could be
+inserted for complex cases or for statistics recording. */
+
+
+/*
+ Data TLB miss on load flow
+ Entry at 0x1100 with the following:
+ srr0 -> address of instruction that caused the miss
+ srr1 -> 0:3=cr0, 13=0 (data), 14=lru way, 15=0, 16:31=saved MSR
+ msr<tgpr> -> 1
+ dMiss -> ea that missed
+ dCmp -> the compare value for the va that missed
+ hash1 -> pointer to first hash pteg
+ hash2 -> pointer to second hash pteg
+
+ Register usage:
+ r0 is limit address during search / scratch after
+ r1 is pte data / error code for DSI exception when search fails
+ r2 is pointer to pte
+ r3 is compare value during search / scratch after
+*/
+ .org tlb_handlers+0x100
+ mfspr r2,HASH1
+ lwz r1,0(r2) # Start memory access as soon as possible
+ mfspr r3,DCMP # to load the cache.
+0: la r0,48(r2) # Use explicit loop to avoid using ctr
+1: cmpw r1,r3 # In theory the loop is somewhat slower
+ beq- 2f # than documentation example
+ cmpw r0,r2 # but we gain from starting cache load
+ lwzu r1,8(r2) # earlier and using slots between load
+ bne+ 1b # and comparison for other purposes.
+ cmpw r1,r3
+ bne- 4f # Secondary hash check
+2: lwz r1,4(r2) # Found: load second word of PTE
+ mfspr r0,DMISS # get miss address during load delay
+#ifdef ASSUME_REF_SET
+ mtspr PPC_RPA,r1
+ mfsrr1 r3
+ tlbld r0
+#else
+ andi. r3,r1,0x100 # check R bit ahead to help folding
+ mfsrr1 r3 # get saved cr0 bits now to dual issue
+ ori r1,r1,0x100
+ mtspr PPC_RPA,r1
+ tlbld r0
+/* Do not update PTE if R bit already set, this will save one cache line
+writeback at a later time, and avoid even more bus traffic in
+multiprocessing systems, when several processors access the same PTEGs.
+We also hope that the reference bit will be already set. */
+ bne+ 3f
+#ifdef MULTIPROCESSING
+ srwi r1,r1,8 # get byte 7 of pte
+ stb r1,+6(r2) # update page table
+#else
+ sth r1,+6(r2) # update page table
+#endif
+#endif
+3: mtcrf 0x80,r3 # restore CR0
+ rfi # return to executing program
+
+/* The preceding code is 18 to 23 instructions long, which occupies
+3 cache lines. */
+4: andi. r0,r3,0x0040 # see if we have done second hash
+ lis r1,0x4000 # set up error code in case next branch taken
+ bne- 9f # speculatively issue the following
+ mfspr r2,HASH2 # get the second pointer
+ ori r3,r3,0x0040 # change the compare value
+ lwz r1,0(r2) # load first entry asap
+ b 0b # and go back to main loop
+/* We are now at 25 to 30 instructions, using 3 or 4 cache lines for all
+cases in which the TLB is successfully loaded. */
+
+
+/*
+ Data TLB miss on store or not dirty page flow
+ Entry at 0x1200 with the following:
+ srr0 -> address of instruction that caused the miss
+ srr1 -> 0:3=cr0, 13=0 (data), 14=lru way, 15=1, 16:31=saved MSR
+ msr<tgpr> -> 1
+ dMiss -> ea that missed
+ dCmp -> the compare value for the va that missed
+ hash1 -> pointer to first hash pteg
+ hash2 -> pointer to second hash pteg
+
+ Register usage:
+ r0 is limit address during search / scratch after
+ r1 is pte data / error code for DSI exception when search fails
+ r2 is pointer to pte
+ r3 is compare value during search / scratch after
+*/
+ .org tlb_handlers+0x200
+ mfspr r2,HASH1
+ lwz r1,0(r2) # Start memory access as soon as possible
+ mfspr r3,DCMP # to load the cache.
+0: la r0,48(r2) # Use explicit loop to avoid using ctr
+1: cmpw r1,r3 # In theory the loop is somewhat slower
+ beq- 2f # than documentation example
+ cmpw r0,r2 # but we gain from starting cache load
+ lwzu r1,8(r2) # earlier and using slots between load
+ bne+ 1b # and comparison for other purposes.
+ cmpw r1,r3
+ bne- 4f # Secondary hash check
+2: lwz r1,4(r2) # Found: load second word of PTE
+ mfspr r0,DMISS # get miss address during load delay
+/* We could simply set the C bit and then rely on hardware to flag protection
+violations. This raises the problem that a page which actually has not been
+modified may be marked as dirty and violates the OEA model for guaranteed
+bit settings (table 5-8 of 603eUM.pdf). This can have harmful consequences
+on operating system memory management routines, and play havoc with copy on
+write schemes. So the protection check is ABSOLUTELY necessary. */
+ andi. r3,r1,0x80 # check C bit
+ beq- 5f # if (C==0) go to check protection
+3: mfsrr1 r3 # get the saved cr0 bits
+ mtspr PPC_RPA,r1 # set the pte
+ tlbld r0 # load the dtlb
+ mtcrf 0x80,r3 # restore CR0
+ rfi # return to executing program
+/* The preceding code is 20 instructions long, which occupy
+3 cache lines. */
+4: andi. r0,r3,0x0040 # see if we have done second hash
+ lis r1,0x4200 # set up error code in case next branch taken
+ bne- 9f # speculatively issue the following
+ mfspr r2,HASH2 # get the second pointer
+ ori r3,r3,0x0040 # change the compare value
+ lwz r1,0(r2) # load first entry asap
+ b 0b # and go back to main loop
+/* We are now at 27 instructions, using 3 or 4 cache lines for all
+cases in which the TLB C bit is already set. */
+
+#ifdef DIRTY_MEANS_WRITABLE
+5: lis r1,0x0A00 # protection violation on store
+#else
+/*
+ Entry found and C==0: check protection before setting C:
+ Register usage:
+ r0 is dMiss register
+ r1 is PTE entry (to be copied to RPA if success)
+ r2 is pointer to pte
+ r3 is trashed
+
+ For the 603e, the key bit in SRR1 helps to decide whether there is a
+ protection violation. However the way the check is done in the manual is
+ not very efficient. The code shown here works as well for 603 and 603e and
+ is much more efficient for the 603 and comparable to the manual example
+ for 603e. This code however has quite a bad structure due to the fact it
+ has been reordered to speed up the most common cases.
+*/
+/* The first of the following two instructions could be replaced by
+andi. r3,r1,3 but it would compete with cmplwi for cr0 resource. */
+5: clrlwi r3,r1,30 # Extract two low order bits
+ cmplwi r3,2 # Test for PP=10
+ bne- 7f # assume fallthrough is more frequent
+6: ori r1,r1,0x180 # set referenced and changed bit
+ sth r1,6(r2) # update page table
+ b 3b # and finish loading TLB
+/* We are now at 33 instructions, using 5 cache lines. */
+7: bgt- 8f # if PP=11 then DSI protection exception
+/* This code only works if key bit is present (602/603e/603ev) */
+#ifdef USE_KEY_BIT
+ mfsrr1 r3 # get the KEY bit and test it
+ andis. r3,r3,0x0008
+ beq 6b # default prediction taken, truly better ?
+#else
+/* This code is for all 602 and 603 family models: */
+ mfsrr1 r3 # Here the trick is to use the MSR PR bit as a
+ mfsrin r0,r0 # shift count for an rlwnm. instruction which
+ extrwi r3,r3,1,17 # extracts and tests the correct key bit from
+ rlwnm. r3,r0,r3,1,1 # the segment register. RISC they said...
+ mfspr r0,DMISS # Restore fault address to r0
+ beq 6b # if 0 load tlb else protection fault
+#endif
+/* We are now at 40 instructions, (37 if using key bit), using 5 cache
+lines in all cases in which the C bit is successfully set */
+8: lis r1,0x0A00 # protection violation on store
+#endif /* DIRTY_IS_WRITABLE */
+/* PTE entry not found branch here with DSISR code in r1 */
+9: mfsrr1 r3
+ mtdsisr r1
+ clrlwi r2,r3,16 # set up srr1 for DSI exception
+ mfmsr r0
+/* I have some doubts about the usefulness of the xori instruction in
+mixed or pure little-endian environment. The address is in the same
+doubleword, hence in the same protection domain and performing an exclusive
+or with 7 is only valid for byte accesses. */
+#ifdef CHECK_MIXED_ENDIAN
+ andi. r1,r2,1 # test LE bit ahead to help folding
+#endif
+ mtsrr1 r2
+ rlwinm r0,r0,0,15,13 # clear the msr<tgpr> bit
+ mfspr r1,DMISS # get miss address
+#ifdef CHECK_MIXED_ENDIAN
+ beq 1f # if little endian then:
+ xori r1,r1,0x07 # de-mung the data address
+1:
+#endif
+ mtdar r1 # put in dar
+ mtcrf 0x80,r3 # restore CR0
+ mtmsr r0 # flip back to the native gprs
+ isync # required from 602 manual
+ b DSIVec # branch to DSI exception
+/* We are now between 50 and 56 instructions. Close to the limit
+but should be sufficient in case bugs are found. */
+/* Altogether the three handlers occupy 128 instructions in the worst
+case, 64 instructions could still be added (non contiguously). */
+ .org tlb_handlers+0x300
+ .globl _handler_glue
+_handler_glue:
+/* Entry code for exceptions: DSI (0x300), ISI(0x400), alignment(0x600) and
+ * traps(0x700). In theory it is not necessary to save and restore r13 and all
+ * higher numbered registers, but it is done because it allowed to call the
+ * firmware (PPCBug) for debugging in the very first stages when writing the
+ * bootloader.
+ */
+ stwu r1,-160(r1)
+ stw r0,save_r(0)
+ mflr r0
+ stmw r2,save_r(2)
+ bl 0f
+0: mfctr r4
+ stw r0,save_lr
+ mflr r9 /* Interrupt vector + few instructions */
+ la r10,160(r1)
+ stw r4,save_ctr
+ mfcr r5
+ lwz r8,2f-0b(r9)
+ mfxer r6
+ stw r5,save_cr
+ mtctr r8
+ stw r6,save_xer
+ mfsrr0 r7
+ stw r10,save_r(1)
+ mfsrr1 r8
+ stw r7,save_nip
+ la r4,8(r1)
+ lwz r13,1f-0b(r9)
+ rlwinm r3,r9,24,0x3f /* Interrupt vector >> 8 */
+ stw r8,save_msr
+ bctrl
+
+ lwz r7,save_msr
+ lwz r6,save_nip
+ mtsrr1 r7
+ lwz r5,save_xer
+ mtsrr0 r6
+ lwz r4,save_ctr
+ mtxer r5
+ lwz r3,save_lr
+ mtctr r4
+ lwz r0,save_cr
+ mtlr r3
+ lmw r2,save_r(2)
+ mtcr r0
+ lwz r0,save_r(0)
+ la r1,160(r1)
+ rfi
+1: .long (__bd)@fixup
+2: .long (_handler)@fixup
+ .section .fixup,"aw"
+ .align 2
+ .long 1b, 2b
+ .previous
diff --git a/bsps/powerpc/motorola_powerpc/bootloader/head.S b/bsps/powerpc/motorola_powerpc/bootloader/head.S
new file mode 100644
index 0000000000..974b78a51c
--- /dev/null
+++ b/bsps/powerpc/motorola_powerpc/bootloader/head.S
@@ -0,0 +1,466 @@
+/*
+ * head.S -- Bootloader Entry point
+ *
+ * Copyright (C) 1998, 1999 Gabriel Paubert, paubert@iram.es
+ *
+ * Modified to compile in RTEMS development environment
+ * by Eric Valette
+ *
+ * Copyright (C) 1999 Eric Valette. valette@crf.canon.fr
+ *
+ * The license and distribution terms for this file may be
+ * found in the file LICENSE in this distribution or at
+ * http://www.rtems.org/license/LICENSE.
+ */
+
+#include <rtems/asm.h>
+#include <rtems/score/cpu.h>
+#include "bootldr.h"
+#include <bspopts.h>
+
+#define TEST_PPCBUG_CALLS
+#undef TEST_PPCBUG_CALLS
+
+#define FRAME_SIZE 32
+#define LOCK_CACHES (HID0_DLOCK | HID0_ILOCK)
+#define INVL_CACHES (HID0_DCI | HID0_ICFI)
+#define ENBL_CACHES (HID0_DCE | HID0_ICE)
+
+#ifndef qemu
+#define USE_PPCBUG
+#endif
+
+#define PRINT_CHAR(c) \
+ addi r20,r3,0 ; \
+ li r3,c ; \
+ li r10,0x20 ; \
+ sc ; \
+ addi r3,r20,0 ; \
+ li r10,0x26 ; \
+ sc
+
+#define MONITOR_ENTER \
+ mfmsr r10 ; \
+ ori r10,r10,MSR_IP ; \
+ mtmsr r10 ; \
+ li r10,0x63 ; \
+ sc
+
+ START_GOT
+ GOT_ENTRY(_GOT2_TABLE_)
+ GOT_ENTRY(_FIXUP_TABLE_)
+ GOT_ENTRY(.bss)
+ GOT_ENTRY(codemove)
+ GOT_ENTRY(0)
+ GOT_ENTRY(__bd)
+ GOT_ENTRY(moved)
+ GOT_ENTRY(_binary_rtems_gz_start)
+ GOT_ENTRY(_binary_initrd_gz_start)
+ GOT_ENTRY(_binary_initrd_gz_end)
+#ifdef TEST_PPCBUG_CALLS
+ GOT_ENTRY(banner_start)
+ GOT_ENTRY(banner_end)
+#endif
+#ifdef USE_PPCBUG
+ GOT_ENTRY(nioc_reset_packet)
+#endif
+ END_GOT
+ .globl start
+ .type start,@function
+
+/* Point the stack into the PreP partition header in the x86 reserved
+ * code area, so that simple C routines can be called.
+ */
+start:
+#if defined(USE_PPCBUG) && defined(DEBUG) && defined(REENTER_MONITOR)
+ MONITOR_ENTER
+#endif
+ bl 1f
+1: mflr r1
+ li r0,0
+ stwu r0,start-1b-0x400+0x1b0-FRAME_SIZE(r1)
+ stmw r26,FRAME_SIZE-24(r1)
+ GET_GOT
+ mfmsr r28 /* Turn off interrupts */
+ ori r0,r28,MSR_EE
+ xori r0,r0,MSR_EE
+ mtmsr r0
+
+/* Enable the caches, from now on cr2.eq set means processor is 601 */
+
+ mfpvr r0
+ mfspr r29,HID0
+ srwi r0,r0,16
+ cmplwi cr2,r0,1
+ beq 2,2f
+
+/*
+ * commented out, 11/7/2002, gregm. This instruction sequence seems to
+ * be pathological on the 603e.
+ *
+
+#ifndef USE_PPCBUG
+ ori r0,r29,ENBL_CACHES|INVL_CACHES|LOCK_CACHES
+ xori r0,r0,INVL_CACHES|LOCK_CACHES
+ sync
+ isync
+ mtspr HID0,r0
+#endif
+*/
+
+2: bl reloc
+
+/* save all the parameters and the orginal msr/hid0/r31 */
+ lwz bd,GOT(__bd)
+ stw r3,0(bd)
+ stw r4,4(bd)
+ stw r5,8(bd)
+ stw r6,12(bd)
+ stw r7,16(bd)
+ stw r8,20(bd)
+ stw r9,24(bd)
+ stw r10,28(bd)
+ stw r28,o_msr(bd)
+ stw r29,o_hid0(bd)
+ stw r31,o_r31(bd)
+
+#ifdef USE_PPCBUG
+/* Stop the network interface - otherwise, memory can get
+ * corrupted by the IF DMAing data into its old buffers or
+ * by writing descriptors...
+ */
+ lwz r3,GOT(nioc_reset_packet)
+ li r10, 0x1d /* .NETCTRL */
+ sc
+#endif
+
+/* Call the routine to fill boot_data structure from residual data.
+ * And to find where the code has to be moved.
+ */
+ lis r3,__size@sectoff@ha
+ addi r3,r3,__size@sectoff@l
+ bl early_setup
+
+/* Now we need to relocate ourselves, where we are told to. First put a
+ * copy of the codemove routine to some place in memory.
+ * (which may be where the 0x41 partition was loaded, so size is critical).
+ */
+ lwz r4,GOT(codemove)
+ li r5,_size_codemove
+ lwz r3,mover(bd)
+ lwz r6,cache_lsize(bd)
+
+ bl codemove
+
+ mtctr r3 # Where the temporary codemove is.
+ lwz r3,image(bd)
+ lis r5,_edata@sectoff@ha
+ lwz r4,GOT(0) # Our own address
+ addi r5,r5,_edata@sectoff@l
+ lwz r6,cache_lsize(bd)
+ lwz r8,GOT(moved)
+
+ sub r7,r3,r4 # Difference to adjust pointers.
+ add r8,r8,r7
+ add r30,r30,r7
+ add bd,bd,r7
+
+/* Call the copy routine but return to the new area. */
+
+ mtlr r8 # for the return address
+ bctr # returns to the moved instruction
+
+/* Establish the new top stack frame. */
+moved: lwz r1,stack(bd)
+ li r0,0
+ stwu r0,-16(r1)
+
+/* relocate again */
+ bl reloc
+/* Clear all of BSS */
+ lwz r10,GOT(.bss)
+ li r0,__bss_words@sectoff@l
+ subi r10,r10,4
+ cmpwi r0,0
+ mtctr r0
+ li r0,0
+ beq 4f
+3: stwu r0,4(r10)
+ bdnz 3b
+
+/* Final memory initialization. First switch to unmapped mode
+ * in case the FW had set the MMU on, and flush the TLB to avoid
+ * stale entries from interfering. No I/O access is allowed
+ * during this time!
+ */
+4:
+#if defined(USE_PPCBUG) && defined(DEBUG)
+ PRINT_CHAR('M')
+#endif
+ bl MMUoff
+
+#if defined(USE_PPCBUG) && defined(DEBUG)
+ PRINT_CHAR('B')
+#endif
+ bl flush_tlb
+
+/* Some firmware versions leave stale values in the BATs, it's time
+ * to invalidate them to avoid interferences with our own mappings.
+ * But the 601 valid bit is in the BATL (IBAT only) and others are in
+ * the [ID]BATU. Bloat, bloat.. fortunately thrown away later.
+ */
+#if defined(USE_PPCBUG) && defined(DEBUG)
+ PRINT_CHAR('T')
+#endif
+ li r3,0
+ beq cr2,5f
+ mtdbatu 0,r3
+ mtdbatu 1,r3
+ mtdbatu 2,r3
+ mtdbatu 3,r3
+5: mtibatu 0,r3
+ mtibatl 0,r3
+ mtibatu 1,r3
+ mtibatl 1,r3
+ mtibatu 2,r3
+ mtibatl 2,r3
+ mtibatu 3,r3
+ mtibatl 3,r3
+ lis r3,__size@sectoff@ha
+ addi r3,r3,__size@sectoff@l
+ sync # We are going to touch SDR1 !
+#if defined(USE_PPCBUG) && defined(DEBUG)
+ PRINT_CHAR('i')
+#endif
+ bl mm_init
+
+#if defined(USE_PPCBUG) && defined(DEBUG)
+ PRINT_CHAR('M')
+#endif
+ bl MMUon
+
+/* Now we are mapped and can perform I/O if we want */
+#ifdef TEST_PPCBUG_CALLS
+/* Experience seems to show that PPCBug can only be called with the
+ * data cache disabled and with MMU disabled. Bummer.
+ */
+ li r10,0x22 # .OUTLN
+ lwz r3,GOT(banner_start)
+ lwz r4,GOT(banner_end)
+ sc
+#endif
+#if defined(USE_PPCBUG) && defined(DEBUG)
+ PRINT_CHAR('H')
+#endif
+ bl setup_hw
+ lwz r4,GOT(_binary_rtems_gz_start)
+ lis r5,_rtems_gz_size@sectoff@ha
+ lwz r6,GOT(_binary_initrd_gz_start)
+ lis r3,_rtems_size@sectoff@ha
+ lwz r7,GOT(_binary_initrd_gz_end)
+ addi r5,r5,_rtems_gz_size@sectoff@l
+ addi r3,r3,_rtems_size@sectoff@l
+ sub r7,r7,r6
+ bl decompress_kernel
+
+/* Back here we are unmapped and we start the kernel, passing up to eight
+ * parameters just in case, only r3 to r7 used for now. Flush the tlb so
+ * that the loaded image starts in a clean state.
+ */
+ bl flush_tlb
+ lwz r3,0(bd)
+ lwz r4,4(bd)
+ lwz r5,8(bd)
+ lwz r6,12(bd)
+ lwz r7,16(bd)
+ lwz r8,20(bd)
+ lwz r9,24(bd)
+ lwz r10,28(bd)
+
+ lwz r30,0(0)
+ mtctr r30
+/*
+ * Linux code again
+ *
+ lis r30,0xdeadc0de@ha
+ addi r30,r30,0xdeadc0de@l
+ stw r30,0(0)
+ li r30,0
+*/
+ dcbst 0,r30 /* Make sure it's in memory ! */
+
+/* We just flash invalidate and disable the dcache, unless it's a 601,
+ * critical areas have been flushed and we don't care about the stack
+ * and other scratch areas.
+ */
+ beq cr2,1f
+ mfspr r0,HID0
+ ori r0,r0,HID0_DCI|HID0_DCE
+ sync
+ mtspr HID0,r0
+ xori r0,r0,HID0_DCI|HID0_DCE
+ mtspr HID0,r0
+
+/* Provisional return to FW, works for PPCBug */
+#if 0 && defined(REENTER_MONITOR)
+ MONITOR_ENTER
+#else
+1: bctr
+#endif
+
+/* relocation function, r30 must point to got2+0x8000 */
+reloc:
+/* Adjust got2 pointers, no need to check for 0, this code already puts
+ * a few entries in the table.
+ */
+ li r0,__got2_entries@sectoff@l
+ la r12,GOT(_GOT2_TABLE_)
+ lwz r11,GOT(_GOT2_TABLE_)
+ mtctr r0
+ sub r11,r12,r11
+ addi r12,r12,-4
+1: lwzu r0,4(r12)
+ add r0,r0,r11
+ stw r0,0(r12)
+ bdnz 1b
+
+/* Now adjust the fixups and the pointers to the fixups in case we need
+ * to move ourselves again.
+ */
+2: li r0,__fixup_entries@sectoff@l
+ lwz r12,GOT(_FIXUP_TABLE_)
+ cmpwi r0,0
+ mtctr r0
+ addi r12,r12,-4
+ beqlr
+3: lwzu r10,4(r12)
+ lwzux r0,r10,r11
+ add r0,r0,r11
+ stw r10,0(r12)
+ stw r0,0(r10)
+ bdnz 3b
+ blr
+
+/* Set the MMU on and off: code is always mapped 1:1 and does not need MMU,
+ * but it does not cost so much to map it also and it catches calls through
+ * NULL function pointers.
+ */
+ .globl MMUon
+ .type MMUon,@function
+MMUon: blr
+ nop
+
+/*
+ mfmsr r0
+ ori r0,r0,MSR_IR|MSR_DR|MSR_IP
+ mflr r11
+ xori r0,r0,MSR_IP
+ mtsrr0 r11
+ mtsrr1 r0
+ rfi
+*/
+ .globl MMUoff
+ .type MMUoff,@function
+MMUoff: blr
+ nop
+
+/*
+ mfmsr r0
+ ori r0,r0,MSR_IR|MSR_DR|MSR_IP
+ mflr r11
+ xori r0,r0,MSR_IR|MSR_DR
+ mtsrr0 r11
+ mtsrr1 r0
+ rfi
+*/
+
+/* Due to the PPC architecture (and according to the specifications), a
+ * series of tlbie which goes through a whole 256 MB segment always flushes
+ * the whole TLB. This is obviously overkill and slow, but who cares ?
+ * It takes about 1 ms on a 200 MHz 603e and works even if residual data
+ * get the number of TLB entries wrong.
+ */
+flush_tlb:
+ lis r11,0x1000
+1: addic. r11,r11,-0x1000
+ tlbie r11
+ bnl 1b
+/* tlbsync is not implemented on 601, so use sync which seems to be a superset
+ * of tlbsync in all cases and do not bother with CPU dependant code
+ */
+ sync
+ blr
+
+ .globl codemove
+codemove:
+ .type codemove,@function
+/* r3 dest, r4 src, r5 length in bytes, r6 cachelinesize */
+ cmplw cr1,r3,r4
+ addi r0,r5,3
+ srwi. r0,r0,2
+ beq cr1,4f /* In place copy is not necessary */
+ beq 7f /* Protect against 0 count */
+ mtctr r0
+ bge cr1,2f
+
+ la r8,-4(r4)
+ la r7,-4(r3)
+1: lwzu r0,4(r8)
+ stwu r0,4(r7)
+ bdnz 1b
+ b 4f
+
+2: slwi r0,r0,2
+ add r8,r4,r0
+ add r7,r3,r0
+3: lwzu r0,-4(r8)
+ stwu r0,-4(r7)
+ bdnz 3b
+
+/* Now flush the cache: note that we must start from a cache aligned
+ * address. Otherwise we might miss one cache line.
+ */
+4: cmpwi r6,0
+ add r5,r3,r5
+ beq 7f /* Always flush prefetch queue in any case */
+ subi r0,r6,1
+ andc r3,r3,r0
+ mr r4,r3
+5: cmplw r4,r5
+ dcbst 0,r4
+ add r4,r4,r6
+ blt 5b
+ sync /* Wait for all dcbst to complete on bus */
+ mr r4,r3
+6: cmplw r4,r5
+ icbi 0,r4
+ add r4,r4,r6
+ blt 6b
+7: sync /* Wait for all icbi to complete on bus */
+ isync
+ blr
+ .size codemove,.-codemove
+_size_codemove=.-codemove
+
+ .section ".data" # .rodata
+ .align 4
+#ifdef USE_PPCBUG
+/* A control 'packet' for the .NETCTRL PPCBug syscall to
+ * reset a network interface. Let's hope they used the
+ * first one for booting!! (CLUN/DLUN == 0/0)
+ * Must be 4-byte aligned...
+ */
+nioc_reset_packet:
+ .byte 0 /* Contoller LUN */
+ .byte 0 /* Device LUN */
+ .word 0 /* status return */
+ .long 5 /* Command (5=RESET) */
+ .long 0 /* Mem. Addr. for real data (unused for reset) */
+ .long 0 /* Number of bytes */
+ .long 0 /* Status/Control Flags (unused for reset) */
+#endif
+#ifdef TEST_PPCBUG_CALLS
+banner_start:
+ .ascii "This message was printed by PPCBug with MMU enabled"
+banner_end:
+#endif
diff --git a/bsps/powerpc/motorola_powerpc/bootloader/lib.c b/bsps/powerpc/motorola_powerpc/bootloader/lib.c
new file mode 100644
index 0000000000..a414c486a9
--- /dev/null
+++ b/bsps/powerpc/motorola_powerpc/bootloader/lib.c
@@ -0,0 +1,62 @@
+/* lib.c
+ *
+ * This file contains the implementation of functions that are unresolved
+ * in the bootloader. Unfortunately it shall not use any object code
+ * from newlib or rtems because they are not compiled with the right option!!!
+ *
+ * You've been warned!!!.
+ */
+
+/*
+ * Copyright (C) 1998, 1999 valette@crf.canon.fr
+ *
+ * The license and distribution terms for this file may be
+ * found in the file LICENSE in this distribution or at
+ * http://www.rtems.org/license/LICENSE.
+ */
+
+
+/*
+ * Provide our own prototypes to avoid warnings and risk getting inlined
+ * conflicts from the normal header files.
+ */
+void* memset(void *p, int c, unsigned int n);
+void* memcpy(void *dst, const void * src, unsigned int n);
+char* strcat(char * dest, const char * src);
+int strlen(const char* string);
+
+void* memset(void *p, int c, unsigned int n)
+{
+ char *q =p;
+ for(; n>0; --n) *q++=c;
+ return p;
+}
+
+void* memcpy(void *dst, const void * src, unsigned int n)
+{
+ unsigned char *d=dst;
+ const unsigned char *s=src;
+
+ while(n-- > 0) *d++=*s++;
+ return dst;
+}
+
+char* strcat(char * dest, const char * src)
+{
+ char *tmp = dest;
+
+ while (*dest)
+ dest++;
+ while ((*dest++ = *src++) != '\0')
+ ;
+ return tmp;
+}
+
+int strlen(const char* string)
+{
+ register int i = 0;
+
+ while (string[i] != '\0')
+ ++i;
+ return i;
+}
diff --git a/bsps/powerpc/motorola_powerpc/bootloader/misc.c b/bsps/powerpc/motorola_powerpc/bootloader/misc.c
new file mode 100644
index 0000000000..508467209b
--- /dev/null
+++ b/bsps/powerpc/motorola_powerpc/bootloader/misc.c
@@ -0,0 +1,545 @@
+/*
+ * head.S -- Bootloader Entry point
+ *
+ * Copyright (C) 1998, 1999 Gabriel Paubert, paubert@iram.es
+ *
+ * Modified to compile in RTEMS development environment
+ * by Eric Valette
+ *
+ * Copyright (C) 1999 Eric Valette. valette@crf.canon.fr
+ *
+ * The license and distribution terms for this file may be
+ * found in the file LICENSE in this distribution or at
+ * http://www.rtems.org/license/LICENSE.
+ */
+
+#include <rtems/system.h>
+#include <sys/types.h>
+#include <string.h>
+#include "bootldr.h"
+#include <libcpu/spr.h>
+#include "zlib.h"
+#include <libcpu/byteorder.h>
+#include <rtems/bspIo.h>
+#include <bsp.h>
+
+/* to align the pointer to the (next) page boundary */
+#define PAGE_ALIGN(addr) (((addr)+PAGE_SIZE-1)&PAGE_MASK)
+
+SPR_RO(PPC_PVR)
+
+struct inode;
+struct wait_queue;
+struct buffer_head;
+typedef struct { int counter; } atomic_t;
+
+typedef struct page {
+ /* these must be first (free area handling) */
+ struct page *next;
+ struct page *prev;
+ struct inode *inode;
+ unsigned long offset;
+ struct page *next_hash;
+ atomic_t count;
+ unsigned long flags; /* atomic flags, some possibly updated asynchronously */
+ struct wait_queue *wait;
+ struct page **pprev_hash;
+ struct buffer_head * buffers;
+} mem_map_t;
+
+extern opaque mm_private, pci_private, v86_private, console_private;
+
+#define CONSOLE_ON_SERIAL "console=ttyS0"
+
+extern struct console_io vacuum_console_functions;
+extern opaque log_console_setup, serial_console_setup, vga_console_setup;
+
+boot_data __bd = {0, 0, 0, 0, 0, 0, 0, 0,
+ 32, 0, 0, 0, 0, 0, 0,
+ &mm_private,
+ NULL,
+ &pci_private,
+ NULL,
+ &v86_private,
+ "root=/dev/hdc1"
+ };
+
+static void exit(void) __attribute__((noreturn));
+
+static void exit(void) {
+ printk("\nOnly way out is to press the reset button!\n");
+ asm volatile("": : :"memory");
+ while(1);
+}
+
+void hang(const char *s, u_long x, ctxt *p) {
+ u_long *r1;
+#ifdef DEBUG
+ print_all_maps("\nMemory mappings at exception time:\n");
+#endif
+ printk("%s %lx NIP: %p LR: %p\n"
+ "Callback trace (stack:return address)\n",
+ s, x, (void *) p->nip, (void *) p->lr);
+ asm volatile("lwz %0,0(1); lwz %0,0(%0); lwz %0,0(%0)": "=b" (r1));
+ while(r1) {
+ printk(" %p:%p\n", r1, (void *) r1[1]);
+ r1 = (u_long *) *r1;
+ }
+ exit();
+};
+
+static void *zalloc(void *x, unsigned items, unsigned size)
+{
+ void *p = salloc(items*size);
+
+ if (!p) {
+ printk("oops... not enough memory for gunzip\n");
+ }
+ return p;
+}
+
+static void zfree(void *x, void *addr, unsigned nb)
+{
+ sfree(addr);
+}
+
+#define HEAD_CRC 2
+#define EXTRA_FIELD 4
+#define ORIG_NAME 8
+#define COMMENT 0x10
+#define RESERVED 0xe0
+
+#define DEFLATED 8
+
+void gunzip(void *dst, int dstlen, unsigned char *src, int *lenp)
+{
+ z_stream s;
+ int r, i, flags;
+
+ /* skip header */
+ i = 10;
+ flags = src[3];
+ if (src[2] != DEFLATED || (flags & RESERVED) != 0) {
+ printk("bad gzipped data\n");
+ exit();
+ }
+ if ((flags & EXTRA_FIELD) != 0)
+ i = 12 + src[10] + (src[11] << 8);
+ if ((flags & ORIG_NAME) != 0)
+ while (src[i++] != 0)
+ ;
+ if ((flags & COMMENT) != 0)
+ while (src[i++] != 0)
+ ;
+ if ((flags & HEAD_CRC) != 0)
+ i += 2;
+ if (i >= *lenp) {
+ printk("gunzip: ran out of data in header\n");
+ exit();
+ }
+
+ s.zalloc = zalloc;
+ s.zfree = zfree;
+ r = inflateInit2(&s, -MAX_WBITS);
+ if (r != Z_OK) {
+ printk("inflateInit2 returned %d\n", r);
+ exit();
+ }
+ s.next_in = src + i;
+ s.avail_in = *lenp - i;
+ s.next_out = dst;
+ s.avail_out = dstlen;
+ r = inflate(&s, Z_FINISH);
+ if (r != Z_OK && r != Z_STREAM_END) {
+ printk("inflate returned %d\n", r);
+ exit();
+ }
+ *lenp = s.next_out - (unsigned char *) dst;
+ inflateEnd(&s);
+}
+
+void decompress_kernel(int kernel_size, void * zimage_start, int len,
+ void * initrd_start, int initrd_len ) {
+ u_char *parea;
+ RESIDUAL* rescopy;
+ int zimage_size= len;
+
+ /* That's a mess, we have to copy the residual data twice just in
+ * case it happens to be in the low memory area where the kernel
+ * is going to be unpacked. Later we have to copy it back to
+ * lower addresses because only the lowest part of memory is mapped
+ * during boot.
+ */
+ parea=__palloc(kernel_size, PA_LOW);
+ if(!parea) {
+ printk("Not enough memory to uncompress the kernel.");
+ exit();
+ }
+
+ rescopy=salloc(sizeof(RESIDUAL));
+ /* Let us hope that residual data is aligned on word boundary */
+ *rescopy = *bd->residual;
+ bd->residual = (void *)PAGE_ALIGN(kernel_size);
+
+ /* Note that this clears the bss as a side effect, so some code
+ * with ugly special case for SMP could be removed from the kernel!
+ */
+ memset(parea, 0, kernel_size);
+ printk("\nUncompressing the kernel...\n");
+
+ gunzip(parea, kernel_size, zimage_start, &zimage_size);
+
+ bd->of_entry = 0;
+ bd->load_address = 0;
+ bd->r6 = (char *)bd->residual+PAGE_ALIGN(sizeof(RESIDUAL));
+ bd->r7 = bd->r6+strlen(bd->cmd_line);
+ if ( initrd_len ) {
+ /* We have to leave some room for the hash table and for the
+ * whole array of struct page. The hash table would be better
+ * located at the end of memory if possible. With some bridges
+ * DMA from the last pages of memory is slower because
+ * prefetching from PCI has to be disabled to avoid accessing
+ * non existing memory. So it is the ideal place to put the
+ * hash table.
+ */
+ unsigned tmp = rescopy->TotalMemory;
+ /* It's equivalent to tmp & (-tmp), but using the negation
+ * operator on unsigned variables looks so ugly.
+ */
+ if ((tmp & (~tmp+1)) != tmp) tmp <<= 1; /* Next power of 2 */
+ tmp /= 256; /* Size of hash table */
+ if (tmp> (2<<20)) tmp=2<<20;
+ tmp = tmp*2 + 0x40000; /* Alignment can double size + 256 kB */
+ tmp += (rescopy->TotalMemory / PAGE_SIZE)
+ * sizeof(struct page);
+ bd->load_address = (void *)PAGE_ALIGN((int)bd->r7 + tmp);
+ bd->of_entry = (char *)bd->load_address+initrd_len;
+ }
+#ifdef DEBUG
+ printk("Kernel at 0x%p, size=0x%x\n", NULL, kernel_size);
+ printk("Initrd at 0x%p, size=0x%x\n",bd->load_address, initrd_len);
+ printk("Residual data at 0x%p\n", bd->residual);
+ printk("Command line at 0x%p\n",bd->r6);
+#endif
+ printk("done\nNow booting...\n");
+ MMUoff(); /* We need to access address 0 ! */
+ codemove(0, parea, kernel_size, bd->cache_lsize);
+ codemove(bd->residual, rescopy, sizeof(RESIDUAL), bd->cache_lsize);
+ codemove(bd->r6, bd->cmd_line, sizeof(bd->cmd_line), bd->cache_lsize);
+ /* codemove checks for 0 length */
+ codemove(bd->load_address, initrd_start, initrd_len, bd->cache_lsize);
+}
+
+static int ticks_per_ms=0;
+
+/*
+ * This is based on rtems_bsp_delay from libcpu
+ */
+void
+boot_udelay(uint32_t _microseconds)
+{
+ uint32_t start, ticks, now;
+
+ ticks = _microseconds * ticks_per_ms / 1000;
+ CPU_Get_timebase_low( start );
+ do {
+ CPU_Get_timebase_low( now );
+ } while (now - start < ticks);
+}
+
+void
+setup_hw(void)
+{
+ char *cp, ch;
+ register RESIDUAL * res;
+ /* PPC_DEVICE * nvram; */
+ struct pci_dev *default_vga;
+ int timer, err;
+ u_short default_vga_cmd;
+
+ res=bd->residual;
+ default_vga=NULL;
+ default_vga_cmd = 0;
+
+#define vpd res->VitalProductData
+ if (_read_PPC_PVR()>>16 != 1) {
+ if ( res && vpd.ProcessorBusHz ) {
+ ticks_per_ms = vpd.ProcessorBusHz/
+ (vpd.TimeBaseDivisor ? vpd.TimeBaseDivisor : 4000);
+ } else {
+ ticks_per_ms = 16500; /* assume 66 MHz on bus */
+ }
+ }
+
+ select_console(CONSOLE_LOG);
+
+ /* We check that the keyboard is present and immediately
+ * select the serial console if not.
+ */
+#if defined(BSP_KBD_IOBASE)
+ err = kbdreset();
+ if (err) select_console(CONSOLE_SERIAL);
+#else
+ err = 1;
+ select_console(CONSOLE_SERIAL);
+#endif
+
+ printk("\nModel: %s\nSerial: %s\n"
+ "Processor/Bus frequencies (Hz): %ld/%ld\n"
+ "Time Base Divisor: %ld\n"
+ "Memory Size: %lx\n"
+ "Residual: %lx (length %lu)\n",
+ vpd.PrintableModel,
+ vpd.Serial,
+ vpd.ProcessorHz,
+ vpd.ProcessorBusHz,
+ (vpd.TimeBaseDivisor ? vpd.TimeBaseDivisor : 4000),
+ res->TotalMemory,
+ (unsigned long)res,
+ res->ResidualLength);
+
+ /* This reconfigures all the PCI subsystem */
+ pci_init();
+
+ /* The Motorola NT firmware does not set the correct mem size */
+ if ( vpd.FirmwareSupplier == 0x10000 ) {
+ int memsize;
+ memsize = find_max_mem(bd->pci_devices);
+ if ( memsize != res->TotalMemory ) {
+ printk("Changed Memory size from %lx to %x\n",
+ res->TotalMemory, memsize);
+ res->TotalMemory = memsize;
+ res->GoodMemory = memsize;
+ }
+ }
+#define ENABLE_VGA_USAGE
+#undef ENABLE_VGA_USAGE
+#ifdef ENABLE_VGA_USAGE
+ /* Find the primary VGA device, chosing the first one found
+ * if none is enabled. The basic loop structure has been copied
+ * from linux/drivers/char/bttv.c by Alan Cox.
+ */
+ for (p = bd->pci_devices; p; p = p->next) {
+ u_short cmd;
+ if (p->class != PCI_CLASS_NOT_DEFINED_VGA &&
+ ((p->class) >> 16 != PCI_BASE_CLASS_DISPLAY))
+ continue;
+ if (p->bus->number != 0) {
+ printk("VGA device not on bus 0 not initialized!\n");
+ continue;
+ }
+ /* Only one can be active in text mode, which for now will
+ * be assumed as equivalent to having I/O response enabled.
+ */
+ pci_bootloader_read_config_word(p, PCI_COMMAND, &cmd);
+ if(cmd & PCI_COMMAND_IO || !default_vga) {
+ default_vga=p;
+ default_vga_cmd=cmd;
+ }
+ }
+
+ /* Disable the enabled VGA device, if any. */
+ if (default_vga)
+ pci_bootloader_write_config_word(default_vga, PCI_COMMAND,
+ default_vga_cmd&
+ ~(PCI_COMMAND_IO|PCI_COMMAND_MEMORY));
+ init_v86();
+ /* Same loop copied from bttv.c, this time doing the serious work */
+ for (p = bd->pci_devices; p; p = p->next) {
+ u_short cmd;
+ if (p->class != PCI_CLASS_NOT_DEFINED_VGA &&
+ ((p->class) >> 16 != PCI_BASE_CLASS_DISPLAY))
+ continue;
+ if (p->bus->number != 0) continue;
+ pci_bootloader_read_config_word(p, PCI_COMMAND, &cmd);
+ pci_bootloader_write_config_word(p, PCI_COMMAND,
+ cmd|PCI_COMMAND_IO|PCI_COMMAND_MEMORY);
+ printk("Calling the emulator.\n");
+ em86_main(p);
+ pci_bootloader_write_config_word(p, PCI_COMMAND, cmd);
+ }
+
+ cleanup_v86_mess();
+#endif
+ /* Reenable the primary VGA device */
+ if (default_vga) {
+ pci_bootloader_write_config_word(default_vga, PCI_COMMAND,
+ default_vga_cmd|
+ (PCI_COMMAND_IO|PCI_COMMAND_MEMORY));
+ if (err) {
+ printk("Keyboard error %d, using serial console!\n",
+ err);
+ } else {
+ select_console(CONSOLE_VGA);
+ }
+ } else if (!err) {
+ select_console(CONSOLE_SERIAL);
+ if (bd->cmd_line[0] == '\0') {
+ strcat(&bd->cmd_line[0], CONSOLE_ON_SERIAL);
+ }
+ else {
+ int s = strlen (bd->cmd_line);
+ bd->cmd_line[s + 1] = ' ';
+ bd->cmd_line[s + 2] = '\0';
+ strcat(&bd->cmd_line[0], CONSOLE_ON_SERIAL);
+ }
+ }
+#if 0
+ /* In the future we may use the NVRAM to store default
+ * kernel parameters.
+ */
+ nvram=residual_find_device(~0UL, NULL, SystemPeripheral, NVRAM,
+ ~0UL, 0);
+ if (nvram) {
+ PnP_TAG_PACKET * pkt;
+ switch (nvram->DevId.Interface) {
+ case IndirectNVRAM:
+ pkt=PnP_find_packet(res->DevicePnpHeap
+ +nvram->AllocatedOffset,
+ )
+ }
+ }
+#endif
+
+ printk("\nRTEMS 4.x/PPC load: ");
+ timer = 0;
+ cp = bd->cmd_line+strlen(bd->cmd_line);
+ while (timer++ < 5*1000) {
+ if (debug_tstc()) {
+ while ((ch = debug_getc()) != '\n' && ch != '\r') {
+ if (ch == '\b' || ch == 0177) {
+ if (cp != bd->cmd_line) {
+ cp--;
+ printk("\b \b");
+ }
+ } else {
+ *cp++ = ch;
+ debug_putc(ch);
+ }
+ }
+ break; /* Exit 'timer' loop */
+ }
+ boot_udelay(1000); /* 1 msec */
+ }
+ *cp = 0;
+}
+
+/* Functions to deal with the residual data */
+static int same_DevID(unsigned short vendor,
+ unsigned short Number,
+ unsigned char * str)
+{
+ static unsigned const char hexdigit[]="0123456789ABCDEF";
+ if (strlen((char*)str)!=7) return 0;
+ if ( ( ((vendor>>10)&0x1f)+'A'-1 == str[0]) &&
+ ( ((vendor>>5)&0x1f)+'A'-1 == str[1]) &&
+ ( (vendor&0x1f)+'A'-1 == str[2]) &&
+ (hexdigit[(Number>>12)&0x0f] == str[3]) &&
+ (hexdigit[(Number>>8)&0x0f] == str[4]) &&
+ (hexdigit[(Number>>4)&0x0f] == str[5]) &&
+ (hexdigit[Number&0x0f] == str[6]) ) return 1;
+ return 0;
+}
+
+PPC_DEVICE *residual_find_device(unsigned long BusMask,
+ unsigned char * DevID,
+ int BaseType,
+ int SubType,
+ int Interface,
+ int n)
+{
+ int i;
+ RESIDUAL *res = bd->residual;
+ if ( !res || !res->ResidualLength ) return NULL;
+ for (i=0; i<res->ActualNumDevices; i++) {
+#define Dev res->Devices[i].DeviceId
+ if ( (Dev.BusId&BusMask) &&
+ (BaseType==-1 || Dev.BaseType==BaseType) &&
+ (SubType==-1 || Dev.SubType==SubType) &&
+ (Interface==-1 || Dev.Interface==Interface) &&
+ (DevID==NULL || same_DevID((Dev.DevId>>16)&0xffff,
+ Dev.DevId&0xffff, DevID)) &&
+ !(n--) ) return res->Devices+i;
+#undef Dev
+ }
+ return 0;
+}
+
+PnP_TAG_PACKET *PnP_find_packet(unsigned char *p,
+ unsigned packet_tag,
+ int n)
+{
+ unsigned mask, masked_tag, size;
+ if(!p) return 0;
+ if (tag_type(packet_tag)) mask=0xff; else mask=0xF8;
+ masked_tag = packet_tag&mask;
+ for(; *p != END_TAG; p+=size) {
+ if ((*p & mask) == masked_tag && !(n--))
+ return (PnP_TAG_PACKET *) p;
+ if (tag_type(*p))
+ size=ld_le16((unsigned short *)(p+1))+3;
+ else
+ size=tag_small_count(*p)+1;
+ }
+ return 0; /* not found */
+}
+
+PnP_TAG_PACKET *PnP_find_small_vendor_packet(unsigned char *p,
+ unsigned packet_type,
+ int n)
+{
+ int next=0;
+ while (p) {
+ p = (unsigned char *) PnP_find_packet(p, 0x70, next);
+ if (p && p[1]==packet_type && !(n--))
+ return (PnP_TAG_PACKET *) p;
+ next = 1;
+ };
+ return 0; /* not found */
+}
+
+PnP_TAG_PACKET *PnP_find_large_vendor_packet(unsigned char *p,
+ unsigned packet_type,
+ int n)
+{
+ int next=0;
+ while (p) {
+ p = (unsigned char *) PnP_find_packet(p, 0x84, next);
+ if (p && p[3]==packet_type && !(n--))
+ return (PnP_TAG_PACKET *) p;
+ next = 1;
+ };
+ return 0; /* not found */
+}
+
+/* Find out the amount of installed memory. For MPC105 and IBM 660 this
+ * can be done by finding the bank with the highest memory ending address
+ */
+int
+find_max_mem( struct pci_dev *dev )
+{
+ u_char banks,tmp;
+ int i, top, max;
+
+ max = 0;
+ for ( ; dev; dev = dev->next) {
+ if ( ((dev->vendor == PCI_VENDOR_ID_MOTOROLA) &&
+ (dev->device == PCI_DEVICE_ID_MOTOROLA_MPC105)) ||
+ ((dev->vendor == PCI_VENDOR_ID_IBM) &&
+ (dev->device == 0x0037/*IBM 660 Bridge*/)) ) {
+ pci_bootloader_read_config_byte(dev, 0xa0, &banks);
+ for (i = 0; i < 8; i++) {
+ if ( banks & (1<<i) ) {
+ pci_bootloader_read_config_byte(dev, 0x90+i, &tmp);
+ top = tmp;
+ pci_bootloader_read_config_byte(dev, 0x98+i, &tmp);
+ top |= (tmp&3)<<8;
+ if ( top > max ) max = top;
+ }
+ }
+ if ( max ) return ((max+1)<<20);
+ else return(0);
+ }
+ }
+ return(0);
+}
diff --git a/bsps/powerpc/motorola_powerpc/bootloader/mm.c b/bsps/powerpc/motorola_powerpc/bootloader/mm.c
new file mode 100644
index 0000000000..e954218ab1
--- /dev/null
+++ b/bsps/powerpc/motorola_powerpc/bootloader/mm.c
@@ -0,0 +1,996 @@
+/*
+ * mm.c -- Crude memory management for early boot.
+ *
+ * Copyright (C) 1998, 1999 Gabriel Paubert, paubert@iram.es
+ *
+ * Modified to compile in RTEMS development environment
+ * by Eric Valette
+ *
+ * Copyright (C) 1999 Eric Valette. valette@crf.canon.fr
+ *
+ * The license and distribution terms for this file may be
+ * found in the file LICENSE in this distribution or at
+ * http://www.rtems.org/license/LICENSE.
+ */
+
+/* This code is a crude memory manager for early boot for LinuxPPC.
+ * As such, it does not try to perform many optimiztions depending
+ * on the processor, it only uses features which are common to
+ * all processors (no BATs...).
+ *
+ * On PreP platorms (the only ones on which it works for now),
+ * it maps 1:1 all RAM/ROM and I/O space as claimed by the
+ * residual data. The holes between these areas can be virtually
+ * remapped to any of these, since for some functions it is very handy
+ * to have virtually contiguous but physically discontiguous memory.
+ *
+ * Physical memory allocation is also very crude, since it's only
+ * designed to manage a small number of large chunks. For valloc/vfree
+ * and palloc/pfree, the unit of allocation is the 4kB page.
+ *
+ * The salloc/sfree has been added after tracing gunzip and seeing
+ * how it performed a very large number of small allocations.
+ * For these the unit of allocation is 8 bytes (the s stands for
+ * small or subpage). This memory is cleared when allocated.
+ *
+ */
+
+#include <rtems/bspIo.h>
+
+#include <sys/types.h>
+#include <libcpu/spr.h>
+#include "bootldr.h"
+#include <libcpu/mmu.h>
+#include <limits.h>
+
+/* to align the pointer to the (next) page boundary */
+#define PAGE_ALIGN(addr) (((addr)+PAGE_SIZE-1)&PAGE_MASK)
+
+extern void (tlb_handlers)(void);
+extern void (_handler_glue)(void);
+
+/* We use our own kind of simple memory areas for the loader, but
+ * we want to avoid potential clashes with kernel includes.
+ * Here a map maps contiguous areas from base to end,
+ * the firstpte entry corresponds to physical address and has the low
+ * order bits set for caching and permission.
+ */
+
+typedef struct _map {
+ struct _map *next;
+ u_long base;
+ u_long end;
+ u_long firstpte;
+} map;
+
+/* The LSB of the firstpte entries on map lists other than mappings
+ * are constants which can be checked for debugging. All these constants
+ * have bit of weight 4 set, this bit is zero in the mappings list entries.
+ * Actually firstpte&7 value is:
+ * - 0 or 1 should not happen
+ * - 2 for RW actual virtual->physical mappings
+ * - 3 for RO actual virtual->physical mappings
+ * - 6 for free areas to be suballocated by salloc
+ * - 7 for salloc'ated areas
+ * - 4 or 5 for all others, in this case firtpte & 63 is
+ * - 4 for unused maps (on the free list)
+ * - 12 for free physical memory
+ * - 13 for physical memory in use
+ * - 20 for free virtual address space
+ * - 21 for allocated virtual address space
+ * - 28 for physical memory space suballocated by salloc
+ * - 29 for physical memory that can't be freed
+ */
+
+#define MAP_FREE_SUBS 6
+#define MAP_USED_SUBS 7
+
+#define MAP_FREE 4
+#define MAP_FREE_PHYS 12
+#define MAP_USED_PHYS 13
+#define MAP_FREE_VIRT 20
+#define MAP_USED_VIRT 21
+#define MAP_SUBS_PHYS 28
+#define MAP_PERM_PHYS 29
+
+SPR_RW(SDR1);
+SPR_RO(DSISR);
+SPR_RO(PPC_DAR);
+
+/* We need a few statically allocated free maps to bootstrap the
+ * memory managment */
+static map free_maps[4] = {{free_maps+1, 0, 0, MAP_FREE},
+ {free_maps+2, 0, 0, MAP_FREE},
+ {free_maps+3, 0, 0, MAP_FREE},
+ {NULL, 0, 0, MAP_FREE}};
+struct _mm_private {
+ void *sdr1;
+ u_long hashmask;
+ map *freemaps; /* Pool of unused map structs */
+ map *mappings; /* Sorted list of virtual->physical mappings */
+ map *physavail; /* Unallocated physical address space */
+ map *physused; /* Allocated physical address space */
+ map *physperm; /* Permanently allocated physical space */
+ map *virtavail; /* Unallocated virtual address space */
+ map *virtused; /* Allocated virtual address space */
+ map *sallocfree; /* Free maps for salloc */
+ map *sallocused; /* Used maps for salloc */
+ map *sallocphys; /* Physical areas used by salloc */
+ u_int hashcnt; /* Used to cycle in PTEG when they overflow */
+} mm_private = {hashmask: 0xffc0,
+ freemaps: free_maps+0};
+
+/* A simplified hash table entry declaration */
+typedef struct _hash_entry {
+ int key;
+ u_long rpn;
+} hash_entry;
+
+void print_maps(map *, const char *);
+
+/* The handler used for all exceptions although for now it is only
+ * designed to properly handle MMU interrupts to fill the hash table.
+ */
+void _handler(int vec, ctxt *p) {
+ map *area;
+ struct _mm_private *mm = (struct _mm_private *) bd->mm_private;
+ u_long vaddr, cause;
+ if (vec==4 || vec==7) { /* ISI exceptions are different */
+ vaddr = p->nip;
+ cause = p->msr;
+ } else { /* Valid for DSI and alignment exceptions */
+ vaddr = _read_PPC_DAR();
+ cause = _read_DSISR();
+ }
+
+ if (vec==3 || vec==4) {
+ /* Panic if the fault is not PTE not found. */
+ if (!(cause & 0x40000000)) {
+ MMUon();
+ printk("\nPanic: vector=%x, cause=%lx\n", vec, cause);
+ hang("Memory protection violation at ", vaddr, p);
+ }
+
+ for(area=mm->mappings; area; area=area->next) {
+ if(area->base<=vaddr && vaddr<=area->end) break;
+ }
+
+ if (area) {
+ u_long hash, vsid, rpn;
+ hash_entry volatile *hte, *_hte1;
+ u_int i, alt=0, flushva;
+
+ vsid = _read_SR((void *)vaddr);
+ rpn = (vaddr&PAGE_MASK)-area->base+area->firstpte;
+ hash = vsid<<6;
+ hash ^= (vaddr>>(PAGE_SHIFT-6))&0x3fffc0;
+ hash &= mm->hashmask;
+ /* Find an empty entry in the PTEG, else
+ * replace a random one.
+ */
+ hte = (hash_entry *) ((u_long)(mm->sdr1)+hash);
+ for (i=0; i<8; i++) {
+ if (hte[i].key>=0) goto found;
+ }
+ hash ^= mm->hashmask;
+ alt = 0x40; _hte1 = hte;
+ hte = (hash_entry *) ((u_long)(mm->sdr1)+hash);
+
+ for (i=0; i<8; i++) {
+ if (hte[i].key>=0) goto found;
+ }
+ alt = 0;
+ hte = _hte1;
+ /* Chose a victim entry and replace it. There might be
+ * better policies to choose the victim, but in a boot
+ * loader we want simplicity as long as it works.
+ *
+ * We would not need to invalidate the TLB entry since
+ * the mapping is still valid. But this would be a mess
+ * when unmapping so we make sure that the TLB is a
+ * subset of the hash table under all circumstances.
+ */
+ i = mm->hashcnt;
+ mm->hashcnt = (mm->hashcnt+1)%8;
+ /* Note that the hash is already complemented here ! */
+ flushva = (~(hash<<9)^((hte[i].key)<<5)) &0x3ff000;
+ if (hte[i].key&0x40) flushva^=0x3ff000;
+ flushva |= ((hte[i].key<<21)&0xf0000000)
+ | ((hte[i].key<<22)&0x0fc00000);
+ hte[i].key=0;
+ asm volatile("sync; tlbie %0; sync" : : "r" (flushva));
+ found:
+ hte[i].rpn = rpn;
+ asm volatile("eieio": : );
+ hte[i].key = 0x80000000|(vsid<<7)|alt|
+ ((vaddr>>22)&0x3f);
+ return;
+ } else {
+ MMUon();
+ printk("\nPanic: vector=%x, cause=%lx\n", vec, cause);
+ hang("\nInvalid memory access attempt at ", vaddr, p);
+ }
+ } else {
+ MMUon();
+ printk(
+ "\nPanic: vector=%d, dsisr=%lx, faultaddr =%lx, "
+ "msr=%lx opcode=%x\n", vec,
+ cause, p->nip, p->msr, * ((unsigned int*) p->nip) );
+ if (vec == 7) {
+ unsigned int* ptr = ((unsigned int*) p->nip) - 4 * 10;
+ for (; ptr <= (((unsigned int*) p->nip) + 4 * 10); ptr ++)
+ printk("Hexdecimal code at address %p = %x\n", ptr, *ptr);
+ }
+ hang("Program or alignment exception at ", vaddr, p);
+ }
+}
+
+/* Generic routines for map handling.
+ */
+
+static inline
+void free_map(map *p) {
+ struct _mm_private *mm = (struct _mm_private *) bd->mm_private;
+ if (!p) return;
+ p->next=mm->freemaps;
+ mm->freemaps=p;
+ p->firstpte=MAP_FREE;
+}
+
+/* Sorted insertion in linked list */
+static
+int insert_map(map **head, map *p) {
+ map *q = *head;
+ if (!p) return 0;
+ if (q && (q->base < p->base)) {
+ for(;q->next && q->next->base<p->base; q = q->next);
+ if ((q->end >= p->base) ||
+ (q->next && p->end>=q->next->base)) {
+ free_map(p);
+ printk("Overlapping areas!\n");
+ return 1;
+ }
+ p->next = q->next;
+ q->next = p;
+ } else { /* Insert at head */
+ if (q && (p->end >= q->base)) {
+ free_map(p);
+ printk("Overlapping areas!\n");
+ return 1;
+ }
+ p->next = q;
+ *head = p;
+ }
+ return 0;
+}
+
+/* Removal from linked list */
+
+static
+map *remove_map(map **head, map *p) {
+ map *q = *head;
+
+ if (!p || !q) return NULL;
+ if (q==p) {
+ *head = q->next;
+ return p;
+ }
+ for(;q && q->next!=p; q=q->next);
+ if (q) {
+ q->next=p->next;
+ return p;
+ } else {
+ return NULL;
+ }
+}
+
+static
+map *remove_map_at(map **head, void * vaddr) {
+ map *p, *q = *head;
+
+ if (!vaddr || !q) return NULL;
+ if (q->base==(u_long)vaddr) {
+ *head = q->next;
+ return q;
+ }
+ while (q->next && q->next->base != (u_long)vaddr) q=q->next;
+ p=q->next;
+ if (p) q->next=p->next;
+ return p;
+}
+
+static inline
+map * alloc_map_page(void) {
+ map *from, *p;
+ struct _mm_private *mm = (struct _mm_private *) bd->mm_private;
+
+ /* printk("Allocating new map page !"); */
+ /* Get the highest page */
+ for (from=mm->physavail; from && from->next; from=from->next);
+ if (!from) return NULL;
+
+ from->end -= PAGE_SIZE;
+
+ mm->freemaps = (map *) (from->end+1);
+
+ for(p=mm->freemaps; p<mm->freemaps+PAGE_SIZE/sizeof(map)-1; p++) {
+ p->next = p+1;
+ p->firstpte = MAP_FREE;
+ }
+ (p-1)->next=0;
+
+ /* Take the last one as pointer to self and insert
+ * the map into the permanent map list.
+ */
+
+ p->firstpte = MAP_PERM_PHYS;
+ p->base=(u_long) mm->freemaps;
+ p->end = p->base+PAGE_SIZE-1;
+
+ insert_map(&mm->physperm, p);
+
+ if (from->end+1 == from->base)
+ free_map(remove_map(&mm->physavail, from));
+
+ return mm->freemaps;
+}
+
+static
+map * alloc_map(void) {
+ map *p;
+ struct _mm_private * mm = (struct _mm_private *) bd->mm_private;
+
+ p = mm->freemaps;
+ if (!p) {
+ p=alloc_map_page();
+ }
+
+ if(p) mm->freemaps=p->next;
+
+ return p;
+}
+
+static
+void coalesce_maps(map *p) {
+ while(p) {
+ if (p->next && (p->end+1 == p->next->base)) {
+ map *q=p->next;
+ p->end=q->end;
+ p->next=q->next;
+ free_map(q);
+ } else {
+ p = p->next;
+ }
+ }
+}
+
+/* These routines are used to find the free memory zones to avoid
+ * overlapping destructive copies when initializing.
+ * They work from the top because of the way we want to boot.
+ * In the following the term zone refers to the memory described
+ * by one or several contiguous so called segments in the
+ * residual data.
+ */
+#define STACK_PAGES 2
+static inline u_long
+find_next_zone(RESIDUAL *res, u_long lowpage, u_long flags) {
+ u_long i, newmin=0, size=0;
+ for(i=0; i<res->ActualNumMemSegs; i++) {
+ if (res->Segs[i].Usage & flags
+ && res->Segs[i].BasePage<lowpage
+ && res->Segs[i].BasePage>newmin) {
+ newmin=res->Segs[i].BasePage;
+ size=res->Segs[i].PageCount;
+ }
+ }
+ return newmin+size;
+}
+
+static inline u_long
+find_zone_start(RESIDUAL *res, u_long highpage, u_long flags) {
+ u_long i;
+ int progress;
+ do {
+ progress=0;
+ for (i=0; i<res->ActualNumMemSegs; i++) {
+ if ( (res->Segs[i].BasePage+res->Segs[i].PageCount
+ == highpage)
+ && res->Segs[i].Usage & flags) {
+ highpage=res->Segs[i].BasePage;
+ progress=1;
+ }
+ }
+ } while(progress);
+ return highpage;
+}
+
+/* The Motorola NT firmware does not provide any setting in the residual
+ * data about memory segment usage. The following table provides enough
+ * info so that this bootloader can work.
+ */
+MEM_MAP seg_fix[] = {
+ { 0x2000, 0xFFF00, 0x00100 },
+ { 0x0020, 0x02000, 0x7E000 },
+ { 0x0008, 0x00800, 0x00168 },
+ { 0x0004, 0x00000, 0x00005 },
+ { 0x0001, 0x006F1, 0x0010F },
+ { 0x0002, 0x006AD, 0x00044 },
+ { 0x0010, 0x00005, 0x006A8 },
+ { 0x0010, 0x00968, 0x00698 },
+ { 0x0800, 0xC0000, 0x3F000 },
+ { 0x0600, 0xBF800, 0x00800 },
+ { 0x0500, 0x81000, 0x3E800 },
+ { 0x0480, 0x80800, 0x00800 },
+ { 0x0440, 0x80000, 0x00800 } };
+
+/* The Motorola NT firmware does not set up all required info in the residual
+ * data. This routine changes some things in a way that the bootloader and
+ * linux are happy.
+ */
+static void
+fix_residual( RESIDUAL *res )
+{
+#if 0
+ PPC_DEVICE *hostbridge;
+#endif
+ int i;
+
+ /* Missing memory segment information */
+ res->ActualNumMemSegs = sizeof(seg_fix)/sizeof(MEM_MAP);
+ for (i=0; i<res->ActualNumMemSegs; i++) {
+ res->Segs[i].Usage = seg_fix[i].Usage;
+ res->Segs[i].BasePage = seg_fix[i].BasePage;
+ res->Segs[i].PageCount = seg_fix[i].PageCount;
+ }
+ /* The following should be fixed in the current version of the
+ * kernel and of the bootloader.
+ */
+#if 0
+ /* PPCBug has this zero */
+ res->VitalProductData.CacheLineSize = 0;
+ /* Motorola NT firmware sets TimeBaseDivisor to 0 */
+ if ( res->VitalProductData.TimeBaseDivisor == 0 ) {
+ res->VitalProductData.TimeBaseDivisor = 4000;
+ }
+
+ /* Motorola NT firmware records the PCIBridge as a "PCIDEVICE" and
+ * sets "PCIBridgeDirect". This bootloader and linux works better if
+ * BusId = "PROCESSORDEVICE" and Interface = "PCIBridgeIndirect".
+ */
+ hostbridge=residual_find_device(PCIDEVICE, NULL,
+ BridgeController,
+ PCIBridge, -1, 0);
+ if (hostbridge) {
+ hostbridge->DeviceId.BusId = PROCESSORDEVICE;
+ hostbridge->DeviceId.Interface = PCIBridgeIndirect;
+ }
+#endif
+}
+
+/* This routine is the first C code called with very little stack space!
+ * Its goal is to find where the boot image can be moved. This will
+ * be the highest address with enough room.
+ */
+int early_setup(u_long image_size) {
+ register RESIDUAL *res = bd->residual;
+ u_long minpages = PAGE_ALIGN(image_size)>>PAGE_SHIFT;
+
+ if ( residual_fw_is_qemu( res ) ) {
+ /* save command-line - QEMU firmware sets R6/R7 to
+ * commandline start/end (NON-PReP STD)
+ */
+ int len = bd->r7 - bd->r6;
+ if ( len > 0 ) {
+ if ( len > sizeof(bd->cmd_line) - 1 )
+ len = sizeof(bd->cmd_line) - 1;
+ codemove(bd->cmd_line, bd->r6, len, bd->cache_lsize);
+ bd->cmd_line[len] = 0;
+ }
+ }
+
+ /* Fix residual if we are loaded by Motorola NT firmware */
+ if ( res && res->VitalProductData.FirmwareSupplier == 0x10000 )
+ fix_residual( res );
+
+ /* FIXME: if OF we should do something different */
+ if( !bd->of_entry && res &&
+ res->ResidualLength <= sizeof(RESIDUAL) && res->Version == 0 ) {
+ u_long lowpage=ULONG_MAX, highpage;
+ u_long imghigh=0, stkhigh=0;
+ /* Find the highest and large enough contiguous zone
+ consisting of free and BootImage sections. */
+ /* Find 3 free areas of memory, one for the main image, one
+ * for the stack (STACK_PAGES), and page one to put the map
+ * structures. They are allocated from the top of memory.
+ * In most cases the stack will be put just below the image.
+ */
+ while((highpage =
+ find_next_zone(res, lowpage, BootImage|Free))) {
+ lowpage=find_zone_start(res, highpage, BootImage|Free);
+ if ((highpage-lowpage)>minpages &&
+ highpage>imghigh) {
+ imghigh=highpage;
+ highpage -=minpages;
+ }
+ if ((highpage-lowpage)>STACK_PAGES &&
+ highpage>stkhigh) {
+ stkhigh=highpage;
+ highpage-=STACK_PAGES;
+ }
+ }
+
+ bd->image = (void *)((imghigh-minpages)<<PAGE_SHIFT);
+ bd->stack=(void *) (stkhigh<<PAGE_SHIFT);
+
+ /* The code mover is put at the lowest possible place
+ * of free memory. If this corresponds to the loaded boot
+ * partition image it does not matter because it overrides
+ * the unused part of it (x86 code).
+ */
+ bd->mover=(void *) (lowpage<<PAGE_SHIFT);
+
+ /* Let us flush the caches in all cases. After all it should
+ * not harm even on 601 and we don't care about performance.
+ * Right now it's easy since all processors have a line size
+ * of 32 bytes. Once again residual data has proved unreliable.
+ */
+ bd->cache_lsize = 32;
+ }
+ /* For now we always assume that it's succesful, we should
+ * handle better the case of insufficient memory.
+ */
+ return 0;
+}
+
+void * valloc(u_long size) {
+ map *p, *q;
+ struct _mm_private * mm = (struct _mm_private *) bd->mm_private;
+
+ if (size==0) return NULL;
+ size=PAGE_ALIGN(size)-1;
+ for (p=mm->virtavail; p; p=p->next) {
+ if (p->base+size <= p->end) break;
+ }
+ if(!p) return NULL;
+ q=alloc_map();
+ q->base=p->base;
+ q->end=q->base+size;
+ q->firstpte=MAP_USED_VIRT;
+ insert_map(&mm->virtused, q);
+ if (q->end==p->end) free_map(remove_map(&mm->virtavail, p));
+ else p->base += size+1;
+ return (void *)q->base;
+}
+
+static
+void vflush(map *virtmap) {
+ struct _mm_private * mm = (struct _mm_private *) bd->mm_private;
+ u_long i, limit=(mm->hashmask>>3)+8;
+ hash_entry volatile *p=(hash_entry *) mm->sdr1;
+
+ /* PTE handling is simple since the processor never update
+ * the entries. Writable pages always have the C bit set and
+ * all valid entries have the R bit set. From the processor
+ * point of view the hash table is read only.
+ */
+ for (i=0; i<limit; i++) {
+ if (p[i].key<0) {
+ u_long va;
+ va = ((i<<9)^((p[i].key)<<5)) &0x3ff000;
+ if (p[i].key&0x40) va^=0x3ff000;
+ va |= ((p[i].key<<21)&0xf0000000)
+ | ((p[i].key<<22)&0x0fc00000);
+ if (va>=virtmap->base && va<=virtmap->end) {
+ p[i].key=0;
+ asm volatile("sync; tlbie %0; sync" : :
+ "r" (va));
+ }
+ }
+ }
+}
+
+void vfree(void *vaddr) {
+ map *physmap, *virtmap; /* Actual mappings pertaining to this vm */
+ struct _mm_private * mm = (struct _mm_private *) bd->mm_private;
+
+ /* Flush memory queues */
+ asm volatile("sync": : : "memory");
+
+ virtmap = remove_map_at(&mm->virtused, vaddr);
+ if (!virtmap) return;
+
+ /* Remove mappings corresponding to virtmap */
+ for (physmap=mm->mappings; physmap; ) {
+ map *nextmap=physmap->next;
+ if (physmap->base>=virtmap->base
+ && physmap->base<virtmap->end) {
+ free_map(remove_map(&mm->mappings, physmap));
+ }
+ physmap=nextmap;
+ }
+
+ vflush(virtmap);
+
+ virtmap->firstpte= MAP_FREE_VIRT;
+ insert_map(&mm->virtavail, virtmap);
+ coalesce_maps(mm->virtavail);
+}
+
+void vunmap(void *vaddr) {
+ map *physmap, *virtmap; /* Actual mappings pertaining to this vm */
+ struct _mm_private *mm = (struct _mm_private *) bd->mm_private;
+
+ /* Flush memory queues */
+ asm volatile("sync": : : "memory");
+
+ /* vaddr must be within one of the vm areas in use and
+ * then must correspond to one of the physical areas
+ */
+ for (virtmap=mm->virtused; virtmap; virtmap=virtmap->next) {
+ if (virtmap->base<=(u_long)vaddr &&
+ virtmap->end>=(u_long)vaddr) break;
+ }
+ if (!virtmap) return;
+
+ physmap = remove_map_at(&mm->mappings, vaddr);
+ if(!physmap) return;
+ vflush(physmap);
+ free_map(physmap);
+}
+
+int vmap(void *vaddr, u_long p, u_long size) {
+ map *q;
+ struct _mm_private *mm = (struct _mm_private *) bd->mm_private;
+
+ size=PAGE_ALIGN(size);
+ if(!size) return 1;
+ /* Check that the requested area fits in one vm image */
+ for (q=mm->virtused; q; q=q->next) {
+ if ((q->base <= (u_long)vaddr) &&
+ (q->end>=(u_long)vaddr+size -1)) break;
+ }
+ if (!q) return 1;
+ q= alloc_map();
+ if (!q) return 1;
+ q->base = (u_long)vaddr;
+ q->end = (u_long)vaddr+size-1;
+ q->firstpte = p;
+ return insert_map(&mm->mappings, q);
+}
+
+static
+void create_identity_mappings(int type, int attr) {
+ u_long lowpage=ULONG_MAX, highpage;
+ struct _mm_private *mm = (struct _mm_private *) bd->mm_private;
+ RESIDUAL * res=bd->residual;
+
+ while((highpage = find_next_zone(res, lowpage, type))) {
+ map *p;
+ lowpage=find_zone_start(res, highpage, type);
+ p=alloc_map();
+ /* Do not map page 0 to catch null pointers */
+ lowpage = lowpage ? lowpage : 1;
+ p->base=lowpage<<PAGE_SHIFT;
+ p->end=(highpage<<PAGE_SHIFT)-1;
+ p->firstpte = (lowpage<<PAGE_SHIFT)|attr;
+ insert_map(&mm->mappings, p);
+ }
+}
+
+static inline
+void add_free_map(u_long base, u_long end) {
+ map *q=NULL;
+ struct _mm_private *mm = (struct _mm_private *) bd->mm_private;
+
+ if (base<end) q=alloc_map();
+ if (!q) return;
+ q->base=base;
+ q->end=end-1;
+ q->firstpte=MAP_FREE_VIRT;
+ insert_map(&mm->virtavail, q);
+}
+
+static inline
+void create_free_vm(void) {
+ map *p;
+ struct _mm_private *mm = (struct _mm_private *) bd->mm_private;
+
+ u_long vaddr=PAGE_SIZE; /* Never map vaddr 0 */
+ for(p=mm->mappings; p; p=p->next) {
+ add_free_map(vaddr, p->base);
+ vaddr=p->end+1;
+ }
+ /* Special end of memory case */
+ if (vaddr) add_free_map(vaddr,0);
+}
+
+/* Memory management initialization.
+ * Set up the mapping lists.
+ */
+
+static inline
+void add_perm_map(u_long start, u_long size) {
+ struct _mm_private *mm = (struct _mm_private *) bd->mm_private;
+ map *p=alloc_map();
+ p->base = start;
+ p->end = start + size - 1;
+ p->firstpte = MAP_PERM_PHYS;
+ insert_map(& mm->physperm , p);
+}
+
+void mm_init(u_long image_size)
+{
+ u_long lowpage=ULONG_MAX, highpage;
+ struct _mm_private *mm = (struct _mm_private *) bd->mm_private;
+ RESIDUAL * res=bd->residual;
+ int i;
+ map *p;
+
+ /* The checks are simplified by the fact that the image
+ * and stack area are always allocated at the upper end
+ * of a free block.
+ */
+ while((highpage = find_next_zone(res, lowpage, BootImage|Free))) {
+ lowpage=find_zone_start(res, highpage, BootImage|Free);
+ if ( ( ((u_long)bd->image+PAGE_ALIGN(image_size))>>PAGE_SHIFT)
+ == highpage) {
+ highpage=(u_long)(bd->image)>>PAGE_SHIFT;
+ add_perm_map((u_long)bd->image, image_size);
+ }
+ if ( (( u_long)bd->stack>>PAGE_SHIFT) == highpage) {
+ highpage -= STACK_PAGES;
+ add_perm_map(highpage<<PAGE_SHIFT,
+ STACK_PAGES*PAGE_SIZE);
+ }
+ /* Protect the interrupt handlers that we need ! */
+ if (lowpage<2) lowpage=2;
+ /* Check for the special case of full area! */
+ if (highpage>lowpage) {
+ p = alloc_map();
+ p->base = lowpage<<PAGE_SHIFT;
+ p->end = (highpage<<PAGE_SHIFT)-1;
+ p->firstpte=MAP_FREE_PHYS;
+ insert_map(&mm->physavail, p);
+ }
+ }
+
+ /* Allocate the hash table */
+ mm->sdr1=__palloc(0x10000, PA_PERM|16);
+ _write_SDR1((u_long)mm->sdr1);
+ memset(mm->sdr1, 0, 0x10000);
+ mm->hashmask = 0xffc0;
+
+ /* Setup the segment registers as we want them */
+ for (i=0; i<16; i++) _write_SR(i, (void *)(i<<28));
+ /* Create the maps for the physical memory, firwmarecode does not
+ * seem to be necessary. ROM is mapped read-only to reduce the risk
+ * of reprogramming it because it's often Flash and some are
+ * amazingly easy to overwrite.
+ */
+ create_identity_mappings(BootImage|Free|FirmwareCode|FirmwareHeap|
+ FirmwareStack, PTE_RAM);
+ create_identity_mappings(SystemROM, PTE_ROM);
+ create_identity_mappings(IOMemory|SystemIO|SystemRegs|
+ PCIAddr|PCIConfig|ISAAddr, PTE_IO);
+
+ create_free_vm();
+
+ /* Install our own MMU and trap handlers. */
+ codemove((void *) 0x300, _handler_glue, 0x100, bd->cache_lsize);
+ codemove((void *) 0x400, _handler_glue, 0x100, bd->cache_lsize);
+ codemove((void *) 0x600, _handler_glue, 0x100, bd->cache_lsize);
+ codemove((void *) 0x700, _handler_glue, 0x100, bd->cache_lsize);
+}
+
+void * salloc(u_long size) {
+ map *p, *q;
+ struct _mm_private *mm = (struct _mm_private *) bd->mm_private;
+
+ if (size==0) return NULL;
+
+ size = (size+7)&~7;
+
+ for (p=mm->sallocfree; p; p=p->next) {
+ if (p->base+size <= p->end) break;
+ }
+ if(!p) {
+ void *m;
+ m = __palloc(size, PA_SUBALLOC);
+ p = alloc_map();
+ if (!m && !p) return NULL;
+ p->base = (u_long) m;
+ p->firstpte = MAP_FREE_SUBS;
+ p->end = (u_long)m+PAGE_ALIGN(size)-1;
+ insert_map(&mm->sallocfree, p);
+ coalesce_maps(mm->sallocfree);
+ coalesce_maps(mm->sallocphys);
+ };
+ q=alloc_map();
+ q->base=p->base;
+ q->end=q->base+size-1;
+ q->firstpte=MAP_USED_SUBS;
+ insert_map(&mm->sallocused, q);
+ if (q->end==p->end) free_map(remove_map(&mm->sallocfree, p));
+ else p->base += size;
+ memset((void *)q->base, 0, size);
+ return (void *)q->base;
+}
+
+void sfree(void *p) {
+ map *q;
+ struct _mm_private *mm = (struct _mm_private *) bd->mm_private;
+
+ q=remove_map_at(&mm->sallocused, p);
+ if (!q) return;
+ q->firstpte=MAP_FREE_SUBS;
+ insert_map(&mm->sallocfree, q);
+ coalesce_maps(mm->sallocfree);
+}
+
+/* first/last area fit, flags is a power of 2 indicating the required
+ * alignment. The algorithms are stupid because we expect very little
+ * fragmentation of the areas, if any. The unit of allocation is the page.
+ * The allocation is by default performed from higher addresses down,
+ * unless flags&PA_LOW is true.
+ */
+
+void * __palloc(u_long size, int flags)
+{
+ u_long mask = ((1<<(flags&PA_ALIGN_MASK))-1);
+ map *newmap, *frommap, *p, *splitmap=0;
+ map **queue;
+ u_long qflags;
+ struct _mm_private *mm = (struct _mm_private *) bd->mm_private;
+
+ /* Asking for a size which is not a multiple of the alignment
+ is likely to be an error. */
+
+ if (size & mask) return NULL;
+ size = PAGE_ALIGN(size);
+ if(!size) return NULL;
+
+ if (flags&PA_SUBALLOC) {
+ queue = &mm->sallocphys;
+ qflags = MAP_SUBS_PHYS;
+ } else if (flags&PA_PERM) {
+ queue = &mm->physperm;
+ qflags = MAP_PERM_PHYS;
+ } else {
+ queue = &mm->physused;
+ qflags = MAP_USED_PHYS;
+ }
+ /* We need to allocate that one now so no two allocations may attempt
+ * to take the same memory simultaneously. Alloc_map_page does
+ * not call back here to avoid infinite recursion in alloc_map.
+ */
+
+ if (mask&PAGE_MASK) {
+ splitmap=alloc_map();
+ if (!splitmap) return NULL;
+ }
+
+ for (p=mm->physavail, frommap=NULL; p; p=p->next) {
+ u_long high = p->end;
+ u_long limit = ((p->base+mask)&~mask) + size-1;
+ if (high>=limit && ((p->base+mask)&~mask)+size>p->base) {
+ frommap = p;
+ if (flags&PA_LOW) break;
+ }
+ }
+
+ if (!frommap) {
+ if (splitmap) free_map(splitmap);
+ return NULL;
+ }
+
+ newmap=alloc_map();
+
+ if (flags&PA_LOW) {
+ newmap->base = (frommap->base+mask)&~mask;
+ } else {
+ newmap->base = (frommap->end +1 - size) & ~mask;
+ }
+
+ newmap->end = newmap->base+size-1;
+ newmap->firstpte = qflags;
+
+ /* Add a fragment if we don't allocate until the end. */
+
+ if (splitmap) {
+ splitmap->base=newmap->base+size;
+ splitmap->end=frommap->end;
+ splitmap->firstpte= MAP_FREE_PHYS;
+ frommap->end=newmap->base-1;
+ } else if (flags & PA_LOW) {
+ frommap->base=newmap->base+size;
+ } else {
+ frommap->end=newmap->base-1;
+ }
+
+ /* Remove a fragment if it becomes empty. */
+ if (frommap->base == frommap->end+1) {
+ free_map(remove_map(&mm->physavail, frommap));
+ }
+
+ if (splitmap) {
+ if (splitmap->base == splitmap->end+1) {
+ free_map(remove_map(&mm->physavail, splitmap));
+ } else {
+ insert_map(&mm->physavail, splitmap);
+ }
+ }
+
+ insert_map(queue, newmap);
+ return (void *) newmap->base;
+
+}
+
+void pfree(void * p) {
+ map *q;
+ struct _mm_private *mm = (struct _mm_private *) bd->mm_private;
+ q=remove_map_at(&mm->physused, p);
+ if (!q) return;
+ q->firstpte=MAP_FREE_PHYS;
+ insert_map(&mm->physavail, q);
+ coalesce_maps(mm->physavail);
+}
+
+#ifdef DEBUG
+/* Debugging functions */
+void print_maps(map *chain, const char *s) {
+ map *p;
+ printk("%s",s);
+ for(p=chain; p; p=p->next) {
+ printk(" %08lx-%08lx: %08lx\n",
+ p->base, p->end, p->firstpte);
+ }
+}
+
+void print_all_maps(const char * s) {
+ u_long freemaps;
+ struct _mm_private *mm = (struct _mm_private *) bd->mm_private;
+ map *free;
+ printk("%s",s);
+ print_maps(mm->mappings, " Currently defined mappings:\n");
+ print_maps(mm->physavail, " Currently available physical areas:\n");
+ print_maps(mm->physused, " Currently used physical areas:\n");
+ print_maps(mm->virtavail, " Currently available virtual areas:\n");
+ print_maps(mm->virtused, " Currently used virtual areas:\n");
+ print_maps(mm->physperm, " Permanently used physical areas:\n");
+ print_maps(mm->sallocphys, " Physical memory used for salloc:\n");
+ print_maps(mm->sallocfree, " Memory available for salloc:\n");
+ print_maps(mm->sallocused, " Memory allocated through salloc:\n");
+ for (freemaps=0, free=mm->freemaps; free; freemaps++, free=free->next);
+ printk(" %ld free maps.\n", freemaps);
+}
+
+void print_hash_table(void) {
+ struct _mm_private *mm = (struct _mm_private *) bd->mm_private;
+ hash_entry *p=(hash_entry *) mm->sdr1;
+ u_int i, valid=0;
+ for (i=0; i<((mm->hashmask)>>3)+8; i++) {
+ if (p[i].key<0) valid++;
+ }
+ printk("%u valid hash entries on pass 1.\n", valid);
+ valid = 0;
+ for (i=0; i<((mm->hashmask)>>3)+8; i++) {
+ if (p[i].key<0) valid++;
+ }
+ printk("%u valid hash entries on pass 2.\n"
+ " vpn:rpn_attr, p/s, pteg.i\n", valid);
+ for (i=0; i<((mm->hashmask)>>3)+8; i++) {
+ if (p[i].key<0) {
+ u_int pteg=(i>>3);
+ u_long vpn;
+ vpn = (pteg^((p[i].key)>>7)) &0x3ff;
+ if (p[i].key&0x40) vpn^=0x3ff;
+ vpn |= ((p[i].key<<9)&0xffff0000)
+ | ((p[i].key<<10)&0xfc00);
+ printk("%08lx:%08lx, %s, %5d.%d\n",
+ vpn, p[i].rpn, p[i].key&0x40 ? "sec" : "pri",
+ pteg, i%8);
+ }
+ }
+}
+
+#endif
diff --git a/bsps/powerpc/motorola_powerpc/bootloader/pci.c b/bsps/powerpc/motorola_powerpc/bootloader/pci.c
new file mode 100644
index 0000000000..c9daca419c
--- /dev/null
+++ b/bsps/powerpc/motorola_powerpc/bootloader/pci.c
@@ -0,0 +1,1374 @@
+/*
+ * pci.c -- Crude pci handling for early boot.
+ *
+ * Copyright (C) 1998, 1999 Gabriel Paubert, paubert@iram.es
+ *
+ * Modified to compile in RTEMS development environment
+ * by Eric Valette
+ *
+ * Copyright (C) 1999 Eric Valette. valette@crf.canon.fr
+ *
+ * The license and distribution terms for this file may be
+ * found in the file LICENSE in this distribution or at
+ * http://www.rtems.org/license/LICENSE.
+ */
+
+#include <sys/types.h>
+#include <rtems/bspIo.h>
+#include <libcpu/spr.h>
+#include "bootldr.h"
+#include "pci.h"
+#include <libcpu/io.h>
+#include <bsp/consoleIo.h>
+#include <string.h>
+#include <bsp.h>
+
+#include <string.h>
+
+
+/*
+#define DEBUG
+#define PCI_DEBUG
+*/
+
+/* Used to reorganize PCI space on stupid machines which spread resources
+ * across a wide address space. This is bad when P2P bridges are present
+ * or when it limits the mappings that a resource hog like a PCI<->VME
+ * bridge can use.
+ */
+
+typedef struct _pci_resource {
+ struct _pci_resource *next;
+ struct pci_dev *dev;
+ u_long base; /* will be 64 bits on 64 bits machines */
+ u_long size;
+ u_char type; /* 1 is I/O else low order 4 bits of the memory type */
+ u_char reg; /* Register # in conf space header */
+ u_short cmd; /* Original cmd byte */
+} pci_resource;
+
+typedef struct _pci_area {
+ struct _pci_area *next;
+ u_long start;
+ u_long end;
+ struct pci_bus *bus;
+ u_int flags;
+} pci_area;
+
+typedef struct _pci_area_head {
+ pci_area *head;
+ u_long mask;
+ int high; /* To allocate from top */
+} pci_area_head;
+
+#define PCI_AREA_PREFETCHABLE 0
+#define PCI_AREA_MEMORY 1
+#define PCI_AREA_IO 2
+
+struct _pci_private {
+ volatile void * config_addr;
+ volatile u_char * config_data;
+ struct pci_dev **last_dev_p;
+ struct pci_bus pci_root;
+ pci_resource *resources;
+ pci_area_head io, mem;
+
+} pci_private = {
+ config_addr: NULL,
+ config_data: (volatile u_char *) 0x80800000,
+ last_dev_p: NULL,
+ resources: NULL,
+ io: {NULL, 0xfff, 0},
+ mem: {NULL, 0xfffff, 0}
+};
+
+#define pci ((struct _pci_private *)(bd->pci_private))
+#define pci_root pci->pci_root
+
+#if !defined(DEBUG)
+#undef PCI_DEBUG
+/*
+ #else
+ #define PCI_DEBUG
+*/
+#endif
+
+#if defined(PCI_DEBUG)
+static void
+print_pci_resources(const char *s) {
+ pci_resource *p;
+ printk("%s", s);
+ for (p=pci->resources; p; p=p->next) {
+/*
+ printk(" %p:%p %06x %08lx %08lx %d\n",
+ p, p->next,
+ (p->dev->devfn<<8)+(p->dev->bus->number<<16)
+ +0x10+p->reg*4,
+ p->base,
+ p->size,
+ p->type);
+*/
+
+ printk(" %p:%p %d:%02x (%04x:%04x) %08lx %08lx %d\n",
+ p, p->next,
+ p->dev->bus->number, PCI_SLOT(p->dev->devfn),
+ p->dev->vendor, p->dev->device,
+ p->base,
+ p->size,
+ p->type);
+
+ }
+}
+
+static void
+print_pci_area(pci_area *p) {
+ for (; p; p=p->next) {
+ printk(" %p:%p %p %08lx %08lx\n",
+ p, p->next, p->bus, p->start, p->end);
+ }
+}
+
+static void
+print_pci_areas(const char *s) {
+ printk("%s PCI I/O areas:\n",s);
+ print_pci_area(pci->io.head);
+ printk(" PCI memory areas:\n");
+ print_pci_area(pci->mem.head);
+}
+#else
+#define print_pci_areas(x)
+#define print_pci_resources(x)
+#endif
+
+/* Maybe there are some devices who use a size different
+ * from the alignment. For now we assume both are the same.
+ * The blacklist might be used for other weird things in the future too,
+ * since weird non PCI complying devices seem to proliferate these days.
+ */
+
+struct blacklist_entry {
+ u_short vendor, device;
+ u_char reg;
+ u_long actual_size;
+};
+
+#define BLACKLIST(vid, did, breg, actual_size) \
+ {PCI_VENDOR_ID_##vid, PCI_DEVICE_ID_##vid##_##did, breg, actual_size}
+
+static struct blacklist_entry blacklist[] = {
+ BLACKLIST(S3, TRIO, 0, 0x04000000),
+ {0xffff, 0, 0, 0}
+};
+
+/* This function filters resources and then inserts them into a list of
+ * configurable pci resources.
+ */
+
+#define AREA(r) \
+(((r->type&PCI_BASE_ADDRESS_SPACE)==PCI_BASE_ADDRESS_SPACE_IO) ? PCI_AREA_IO :\
+ ((r->type&PCI_BASE_ADDRESS_MEM_PREFETCH) ? PCI_AREA_PREFETCHABLE :\
+ PCI_AREA_MEMORY))
+
+static int insert_before(pci_resource *e, pci_resource *t) {
+ if (e->dev->bus->number != t->dev->bus->number)
+ return e->dev->bus->number > t->dev->bus->number;
+ if (AREA(e) != AREA(t)) return AREA(e)<AREA(t);
+ return (e->size > t->size);
+}
+
+static void insert_resource(pci_resource *r) {
+ struct blacklist_entry *b;
+ pci_resource *p;
+ if (!r) return;
+
+ /* First fixup in case we have a blacklist entry. Note that this
+ * may temporarily leave a resource in an inconsistent state: with
+ * (base & (size-1)) !=0. This is harmless.
+ */
+ for (b=blacklist; b->vendor!=0xffff; b++) {
+ if ((r->dev->vendor==b->vendor) &&
+ (r->dev->device==b->device) &&
+ (r->reg==b->reg)) {
+ r->size=b->actual_size;
+ break;
+ }
+ }
+
+ /* Motorola NT firmware does not configure pci devices which are not
+ * required for booting, others do. For now:
+ * - allocated devices in the ISA range (64kB I/O, 16Mb memory)
+ * but non zero base registers are left as is.
+ * - all other registers, whether already allocated or not, are
+ * reallocated unless they require an inordinate amount of
+ * resources (>256 Mb for memory >64kB for I/O). These
+ * devices with too large mapping requirements are simply ignored
+ * and their bases are set to 0. This should disable the
+ * corresponding decoders according to the PCI specification.
+ * Many devices are buggy in this respect, however, but the
+ * limits have hopefully been set high enough to avoid problems.
+ */
+
+ /*
+ ** This is little ugly below. It seems that at least on the MCP750,
+ ** the PBC has some default IO space mappings that the bsp #defines
+ ** that read/write to PCI I/O space assume, particuarly the i8259
+ ** manipulation code. So, if we allow the small IO spaces on PCI bus
+ ** 0 and 1 to be remapped, the registers can shift out from under the
+ ** #defines. This is particuarly awful, but short of redefining the
+ ** PCI I/O primitives to be functions with base addresses read from
+ ** the hardware, we are stuck with the kludge below. Note that
+ ** everything is remapped on the CPCI backplane and any downstream
+ ** hardware, its just the builtin stuff we're tiptoeing around.
+ **
+ ** Gregm, 7/16/2003
+ **
+ ** Gregm, changed 11/2003 so IO devices only on bus 0 zero are not
+ ** remapped. This covers the builtin pc-like io devices- but
+ ** properly maps IO devices on higher busses.
+ */
+ if( r->dev->bus->number == 0 )
+ {
+ if ((r->type==PCI_BASE_ADDRESS_SPACE_IO)
+ ? (r->base && r->base <0x10000)
+ : (r->base && r->base <0x1000000)) {
+
+#ifdef PCI_DEBUG
+ printk("freeing region; %p:%p %d:%02x (%04x:%04x) %08lx %08lx %d\n",
+ r, r->next,
+ r->dev->bus->number, PCI_SLOT(r->dev->devfn),
+ r->dev->vendor, r->dev->device,
+ r->base,
+ r->size,
+ r->type);
+#endif
+ sfree(r);
+ return;
+ }
+ }
+
+
+ /* 2004/11/30, PR 729 fix is removing the r->size=0 and r->base=0
+ * assignement which makes too-large regions conflict with onboard
+ * hardware, replacing it with sfree which deletes the memory region
+ * from the setup code, leaving it disabled. */
+ if ((r->type==PCI_BASE_ADDRESS_SPACE_IO)
+ ? (r->size > 0x10000)
+ : (r->size > 0x18000000)) {
+ sfree(r);
+ return;
+ }
+
+ /* Now insert into the list sorting by
+ * 1) decreasing bus number
+ * 2) space: prefetchable memory, non-prefetchable and finally I/O
+ * 3) decreasing size
+ */
+ if (!pci->resources || insert_before(r, pci->resources)) {
+ r->next = pci->resources;
+ pci->resources=r;
+ } else {
+ for (p=pci->resources; p->next; p=p->next) {
+ if (insert_before(r, p->next)) break;
+ }
+ r->next=p->next;
+ p->next=r;
+ }
+}
+
+/* This version only works for bus 0. I don't have any P2P bridges to test
+ * a more sophisticated version which has therefore not been implemented.
+ * Prefetchable memory is not yet handled correctly either.
+ * And several levels of PCI bridges much less even since there must be
+ * allocated together to be able to setup correctly the top bridge.
+ */
+
+static u_long find_range(u_char bus, u_char type,
+ pci_resource **first,
+ pci_resource **past, u_int *flags) {
+ pci_resource *p;
+ u_long total=0;
+ u_int fl=0;
+
+ for (p=pci->resources; p; p=p->next)
+ {
+ if ((p->dev->bus->number == bus) &&
+ AREA(p)==type) break;
+ }
+
+ *first = p;
+
+ for (; p; p=p->next)
+ {
+ if ((p->dev->bus->number != bus) ||
+ AREA(p)!=type || p->size == 0) break;
+ total = total+p->size;
+ fl |= 1<<p->type;
+ }
+
+ *past = p;
+ /* This will be used later to tell whether there are any 32 bit
+ * devices in an area which could be mapped higher than 4Gb
+ * on 64 bits architectures
+ */
+ *flags = fl;
+ return total;
+}
+
+static inline void init_free_area(pci_area_head *h, u_long start,
+ u_long end, u_int mask, int high) {
+ pci_area *p;
+ p = salloc(sizeof(pci_area));
+ if (!p) return;
+ h->head = p;
+ p->next = NULL;
+ p->start = (start+mask)&~mask;
+ p->end = (end-mask)|mask;
+ p->bus = NULL;
+ h->mask = mask;
+ h->high = high;
+}
+
+static void insert_area(pci_area_head *h, pci_area *p) {
+ pci_area *q = h->head;
+ if (!p) return;
+ if (q && (q->start< p->start)) {
+ for(;q->next && q->next->start<p->start; q = q->next);
+ if ((q->end >= p->start) ||
+ (q->next && p->end>=q->next->start)) {
+ sfree(p);
+ printk("Overlapping pci areas!\n");
+ return;
+ }
+ p->next = q->next;
+ q->next = p;
+ } else { /* Insert at head */
+ if (q && (p->end >= q->start)) {
+ sfree(p);
+ printk("Overlapping pci areas!\n");
+ return;
+ }
+ p->next = q;
+ h->head = p;
+ }
+}
+
+static
+void remove_area(pci_area_head *h, pci_area *p)
+{
+ pci_area *q = h->head;
+
+ if (!p || !q) return;
+ if (q==p)
+ {
+ h->head = q->next;
+ return;
+ }
+ for(;q && q->next!=p; q=q->next);
+ if (q) q->next=p->next;
+}
+
+static pci_area * alloc_area(pci_area_head *h, struct pci_bus *bus,
+ u_long required, u_long mask, u_int flags) {
+ pci_area *p;
+ pci_area *from, *split, *new;
+
+ required = (required+h->mask) & ~h->mask;
+ for (p=h->head, from=NULL; p; p=p->next)
+ {
+ u_long l1 = ((p->start+required+mask)&~mask)-1;
+ u_long l2 = ((p->start+mask)&~mask)+required-1;
+ /* Allocated areas point to the bus to which they pertain */
+ if (p->bus) continue;
+ if ((p->end)>=l1 || (p->end)>=l2) from=p;
+ if (from && !h->high) break;
+ }
+ if (!from) return NULL;
+
+ split = salloc(sizeof(pci_area));
+ new = salloc(sizeof(pci_area));
+ /* If allocation of new succeeds then allocation of split has
+ * also been successful (given the current mm algorithms) !
+ */
+ if (!new) {
+ sfree(split);
+ return NULL;
+ }
+ new->bus = bus;
+ new->flags = flags;
+ /* Now allocate pci_space taking alignment into account ! */
+ if (h->high)
+ {
+ u_long l1 = ((from->end+1)&~mask)-required;
+ u_long l2 = (from->end+1-required)&~mask;
+ new->start = (l1>l2) ? l1 : l2;
+ split->end = from->end;
+ from->end = new->start-1;
+ split->start = new->start+required;
+ new->end = new->start+required-1;
+ }
+ else
+ {
+ u_long l1 = ((from->start+mask)&~mask)+required-1;
+ u_long l2 = ((from->start+required+mask)&~mask)-1;
+ new->end = (l1<l2) ? l1 : l2;
+ split->start = from->start;
+ from->start = new->end+1;
+ new->start = new->end+1-required;
+ split->end = new->start-1;
+ }
+
+ if (from->end+1 == from->start) remove_area(h, from);
+ if (split->end+1 != split->start)
+ {
+ split->bus = NULL;
+ insert_area(h, split);
+ }
+ else
+ {
+ sfree(split);
+ }
+ insert_area(h, new);
+ print_pci_areas("alloc_area called:\n");
+ return new;
+}
+
+static inline
+void alloc_space(pci_area *p, pci_resource *r)
+{
+ if (p->start & (r->size-1)) {
+ r->base = p->end+1-r->size;
+ p->end -= r->size;
+ } else {
+ r->base = p->start;
+ p->start += r->size;
+ }
+}
+
+static void reconfigure_bus_space(u_char bus, u_char type, pci_area_head *h)
+{
+ pci_resource *first, *past, *r;
+ pci_area *area, tmp;
+ u_int flags;
+ u_int required = find_range(bus, type, &first, &past, &flags);
+
+ if (required==0) return;
+
+ area = alloc_area(h, first->dev->bus, required, first->size-1, flags);
+
+ if (!area) return;
+
+ tmp = *area;
+ for (r=first; r!=past; r=r->next)
+ {
+ alloc_space(&tmp, r);
+ }
+}
+
+#define BUS0_IO_START 0x10000
+#define BUS0_IO_END 0x1ffff
+#define BUS0_MEM_START 0x1000000
+#define BUS0_MEM_END 0x3f00000
+
+#define BUSREST_IO_START 0x20000
+#define BUSREST_IO_END 0x7ffff
+#define BUSREST_MEM_START 0x4000000
+#define BUSREST_MEM_END 0x10000000
+
+static void reconfigure_pci(void) {
+ pci_resource *r;
+ struct pci_dev *dev;
+
+ u_long bus0_mem_start = BUS0_MEM_START;
+ u_long bus0_mem_end = BUS0_MEM_END;
+
+ if ( residual_fw_is_qemu( bd->residual ) ) {
+ bus0_mem_start += PREP_ISA_MEM_BASE;
+ bus0_mem_end += PREP_ISA_MEM_BASE;
+ }
+
+ /* FIXME: for now memory is relocated from low, it's better
+ * to start from higher addresses.
+ */
+ /*
+ init_free_area(&pci->io, 0x10000, 0x7fffff, 0xfff, 0);
+ init_free_area(&pci->mem, 0x1000000, 0x3cffffff, 0xfffff, 0);
+ */
+
+ init_free_area(&pci->io, BUS0_IO_START, BUS0_IO_END, 0xfff, 0);
+ init_free_area(&pci->mem, bus0_mem_start, bus0_mem_end, 0xfffff, 0);
+
+ /* First reconfigure the I/O space, this will be more
+ * complex when there is more than 1 bus. And 64 bits
+ * devices are another kind of problems.
+ */
+ reconfigure_bus_space(0, PCI_AREA_IO, &pci->io);
+ reconfigure_bus_space(0, PCI_AREA_MEMORY, &pci->mem);
+ reconfigure_bus_space(0, PCI_AREA_PREFETCHABLE, &pci->mem);
+
+ /* Now we have to touch the configuration space of all
+ * the devices to remap them better than they are right now.
+ * This is done in 3 steps:
+ * 1) first disable I/O and memory response of all devices
+ * 2) modify the base registers
+ * 3) restore the original PCI_COMMAND register.
+ */
+ for (r=pci->resources; r; r= r->next) {
+ if (!r->dev->sysdata) {
+ r->dev->sysdata=r;
+ pci_bootloader_read_config_word(r->dev, PCI_COMMAND, &r->cmd);
+ pci_bootloader_write_config_word(r->dev, PCI_COMMAND,
+ r->cmd & ~(PCI_COMMAND_IO|
+ PCI_COMMAND_MEMORY));
+ }
+ }
+
+ for (r=pci->resources; r; r= r->next) {
+ pci_bootloader_write_config_dword(r->dev,
+ PCI_BASE_ADDRESS_0+(r->reg<<2),
+ r->base);
+
+ if ( residual_fw_is_qemu( bd->residual ) && r->dev->sysdata ) {
+ if ( PCI_BASE_ADDRESS_SPACE_IO == (r->type & PCI_BASE_ADDRESS_SPACE) )
+ ((pci_resource*)r->dev->sysdata)->cmd |= PCI_COMMAND_IO;
+ else
+ ((pci_resource*)r->dev->sysdata)->cmd |= PCI_COMMAND_MEMORY;
+ }
+
+ if ((r->type&
+ (PCI_BASE_ADDRESS_SPACE|
+ PCI_BASE_ADDRESS_MEM_TYPE_MASK)) ==
+ (PCI_BASE_ADDRESS_SPACE_MEMORY|
+ PCI_BASE_ADDRESS_MEM_TYPE_64)) {
+ pci_bootloader_write_config_dword(r->dev,
+ PCI_BASE_ADDRESS_1+(r->reg<<2),
+ 0);
+ }
+ }
+ for (dev=bd->pci_devices; dev; dev= dev->next) {
+ if (dev->sysdata) {
+ pci_bootloader_write_config_word(dev, PCI_COMMAND,
+ ((pci_resource *)dev->sysdata)
+ ->cmd);
+ dev->sysdata=NULL;
+ }
+ }
+}
+
+static int
+indirect_pci_read_config_byte(unsigned char bus, unsigned char dev_fn,
+ unsigned char offset, uint8_t *val) {
+ out_be32(pci->config_addr,
+ 0x80|(bus<<8)|(dev_fn<<16)|((offset&~3)<<24));
+ *val=in_8(pci->config_data + (offset&3));
+ return PCIBIOS_SUCCESSFUL;
+}
+
+static int
+indirect_pci_read_config_word(unsigned char bus, unsigned char dev_fn,
+ unsigned char offset, uint16_t *val) {
+ *val = 0xffff;
+ if (offset&1) return PCIBIOS_BAD_REGISTER_NUMBER;
+ out_be32(pci->config_addr,
+ 0x80|(bus<<8)|(dev_fn<<16)|((offset&~3)<<24));
+ *val=in_le16((volatile uint16_t *)(pci->config_data + (offset&3)));
+ return PCIBIOS_SUCCESSFUL;
+}
+
+static int
+indirect_pci_read_config_dword(unsigned char bus, unsigned char dev_fn,
+ unsigned char offset, uint32_t *val) {
+ *val = 0xffffffff;
+ if (offset&3) return PCIBIOS_BAD_REGISTER_NUMBER;
+ out_be32(pci->config_addr,
+ 0x80|(bus<<8)|(dev_fn<<16)|(offset<<24));
+ *val=in_le32((volatile uint32_t *)pci->config_data);
+ return PCIBIOS_SUCCESSFUL;
+}
+
+static int
+indirect_pci_write_config_byte(unsigned char bus, unsigned char dev_fn,
+ unsigned char offset, uint8_t val) {
+ out_be32(pci->config_addr,
+ 0x80|(bus<<8)|(dev_fn<<16)|((offset&~3)<<24));
+ out_8(pci->config_data + (offset&3), val);
+ return PCIBIOS_SUCCESSFUL;
+}
+
+static int
+indirect_pci_write_config_word(unsigned char bus, unsigned char dev_fn,
+ unsigned char offset, uint16_t val) {
+ if (offset&1) return PCIBIOS_BAD_REGISTER_NUMBER;
+ out_be32(pci->config_addr,
+ 0x80|(bus<<8)|(dev_fn<<16)|((offset&~3)<<24));
+ out_le16((volatile uint16_t *)(pci->config_data + (offset&3)), val);
+ return PCIBIOS_SUCCESSFUL;
+}
+
+static int
+indirect_pci_write_config_dword(unsigned char bus, unsigned char dev_fn,
+ unsigned char offset, uint32_t val) {
+ if (offset&3) return PCIBIOS_BAD_REGISTER_NUMBER;
+ out_be32(pci->config_addr,
+ 0x80|(bus<<8)|(dev_fn<<16)|(offset<<24));
+ out_le32((volatile uint32_t *)pci->config_data, val);
+ return PCIBIOS_SUCCESSFUL;
+}
+
+static const struct pci_bootloader_config_access_functions indirect_functions = {
+ indirect_pci_read_config_byte,
+ indirect_pci_read_config_word,
+ indirect_pci_read_config_dword,
+ indirect_pci_write_config_byte,
+ indirect_pci_write_config_word,
+ indirect_pci_write_config_dword
+};
+
+static int
+direct_pci_read_config_byte(unsigned char bus, unsigned char dev_fn,
+ unsigned char offset, uint8_t *val) {
+ if (bus != 0 || (1<<PCI_SLOT(dev_fn) & 0xff8007fe)) {
+ *val=0xff;
+ return PCIBIOS_DEVICE_NOT_FOUND;
+ }
+ *val=in_8(pci->config_data + ((1<<PCI_SLOT(dev_fn))&~1)
+ + (PCI_FUNC(dev_fn)<<8) + offset);
+ return PCIBIOS_SUCCESSFUL;
+}
+
+static int
+direct_pci_read_config_word(unsigned char bus, unsigned char dev_fn,
+ unsigned char offset, uint16_t *val) {
+ *val = 0xffff;
+ if (offset&1) return PCIBIOS_BAD_REGISTER_NUMBER;
+ if (bus != 0 || (1<<PCI_SLOT(dev_fn) & 0xff8007fe)) {
+ return PCIBIOS_DEVICE_NOT_FOUND;
+ }
+ *val=in_le16((volatile uint16_t *)
+ (pci->config_data + ((1<<PCI_SLOT(dev_fn))&~1)
+ + (PCI_FUNC(dev_fn)<<8) + offset));
+ return PCIBIOS_SUCCESSFUL;
+}
+
+static int
+direct_pci_read_config_dword(unsigned char bus, unsigned char dev_fn,
+ unsigned char offset, uint32_t *val) {
+ *val = 0xffffffff;
+ if (offset&3) return PCIBIOS_BAD_REGISTER_NUMBER;
+ if (bus != 0 || (1<<PCI_SLOT(dev_fn) & 0xff8007fe)) {
+ return PCIBIOS_DEVICE_NOT_FOUND;
+ }
+ *val=in_le32((volatile uint32_t *)
+ (pci->config_data + ((1<<PCI_SLOT(dev_fn))&~1)
+ + (PCI_FUNC(dev_fn)<<8) + offset));
+ return PCIBIOS_SUCCESSFUL;
+}
+
+static int
+direct_pci_write_config_byte(unsigned char bus, unsigned char dev_fn,
+ unsigned char offset, uint8_t val) {
+ if (bus != 0 || (1<<PCI_SLOT(dev_fn) & 0xff8007fe)) {
+ return PCIBIOS_DEVICE_NOT_FOUND;
+ }
+ out_8(pci->config_data + ((1<<PCI_SLOT(dev_fn))&~1)
+ + (PCI_FUNC(dev_fn)<<8) + offset,
+ val);
+ return PCIBIOS_SUCCESSFUL;
+}
+
+static int
+direct_pci_write_config_word(unsigned char bus, unsigned char dev_fn,
+ unsigned char offset, uint16_t val) {
+ if (offset&1) return PCIBIOS_BAD_REGISTER_NUMBER;
+ if (bus != 0 || (1<<PCI_SLOT(dev_fn) & 0xff8007fe)) {
+ return PCIBIOS_DEVICE_NOT_FOUND;
+ }
+ out_le16((volatile uint16_t *)
+ (pci->config_data + ((1<<PCI_SLOT(dev_fn))&~1)
+ + (PCI_FUNC(dev_fn)<<8) + offset),
+ val);
+ return PCIBIOS_SUCCESSFUL;
+}
+
+static int
+direct_pci_write_config_dword(unsigned char bus, unsigned char dev_fn,
+ unsigned char offset, uint32_t val) {
+ if (offset&3) return PCIBIOS_BAD_REGISTER_NUMBER;
+ if (bus != 0 || (1<<PCI_SLOT(dev_fn) & 0xff8007fe)) {
+ return PCIBIOS_DEVICE_NOT_FOUND;
+ }
+ out_le32((volatile uint32_t *)
+ (pci->config_data + ((1<<PCI_SLOT(dev_fn))&~1)
+ + (PCI_FUNC(dev_fn)<<8) + offset),
+ val);
+ return PCIBIOS_SUCCESSFUL;
+}
+
+static const struct pci_bootloader_config_access_functions direct_functions = {
+ direct_pci_read_config_byte,
+ direct_pci_read_config_word,
+ direct_pci_read_config_dword,
+ direct_pci_write_config_byte,
+ direct_pci_write_config_word,
+ direct_pci_write_config_dword
+};
+
+static void pci_read_bases(struct pci_dev *dev, unsigned int howmany)
+{
+ unsigned int reg, nextreg;
+
+#define REG (PCI_BASE_ADDRESS_0 + (reg<<2))
+
+ u_short cmd;
+ uint32_t l, ml;
+ pci_bootloader_read_config_word(dev, PCI_COMMAND, &cmd);
+
+ for(reg=0; reg<howmany; reg=nextreg)
+ {
+ pci_resource *r;
+
+ nextreg=reg+1;
+ pci_bootloader_read_config_dword(dev, REG, &l);
+#if 0
+ if (l == 0xffffffff /*AJF || !l*/) continue;
+#endif
+ /* Note that disabling the memory response of a host bridge
+ * would lose data if a DMA transfer were in progress. In a
+ * bootloader we don't care however. Also we can't print any
+ * message for a while since we might just disable the console.
+ */
+ pci_bootloader_write_config_word(dev, PCI_COMMAND, cmd &
+ ~(PCI_COMMAND_IO|PCI_COMMAND_MEMORY));
+ pci_bootloader_write_config_dword(dev, REG, ~0);
+ pci_bootloader_read_config_dword(dev, REG, &ml);
+ pci_bootloader_write_config_dword(dev, REG, l);
+
+ /* Reenable the device now that we've played with
+ * base registers.
+ */
+ pci_bootloader_write_config_word(dev, PCI_COMMAND, cmd);
+
+ /* seems to be an unused entry skip it */
+ if ( ml == 0 || ml == 0xffffffff ) continue;
+
+ if ((l &
+ (PCI_BASE_ADDRESS_SPACE|PCI_BASE_ADDRESS_MEM_TYPE_MASK))
+ == (PCI_BASE_ADDRESS_MEM_TYPE_64
+ |PCI_BASE_ADDRESS_SPACE_MEMORY)) {
+ nextreg=reg+2;
+ }
+ dev->base_address[reg] = l;
+ r = salloc(sizeof(pci_resource));
+ if (!r) {
+ printk("Error allocating pci_resource struct.\n");
+ continue;
+ }
+ r->dev = dev;
+ r->reg = reg;
+ if ((l&PCI_BASE_ADDRESS_SPACE) == PCI_BASE_ADDRESS_SPACE_IO) {
+ r->type = l&~PCI_BASE_ADDRESS_IO_MASK;
+ r->base = l&PCI_BASE_ADDRESS_IO_MASK;
+ /* r->size = ~(ml&PCI_BASE_ADDRESS_IO_MASK)+1; */
+ } else {
+ r->type = l&~PCI_BASE_ADDRESS_MEM_MASK;
+ r->base = l&PCI_BASE_ADDRESS_MEM_MASK;
+ /* r->size = ~(ml&PCI_BASE_ADDRESS_MEM_MASK)+1; */
+ }
+
+ /* find the first bit set to one after the base
+ address type bits to find length of region */
+ {
+ unsigned int c= 16 , val= 0;
+ while( !(val= ml & c) ) c <<= 1;
+ r->size = val;
+ }
+
+#ifdef PCI_DEBUG
+ printk(" readbase bus %d, (%04x:%04x), base %08x, size %08x, type %d\n",
+ r->dev->bus->number,
+ r->dev->vendor,
+ r->dev->device,
+ r->base,
+ r->size,
+ r->type );
+#endif
+
+ /* Check for the blacklisted entries */
+ insert_resource(r);
+ }
+}
+
+static u_int pci_scan_bus(struct pci_bus *bus)
+{
+ unsigned int devfn, max;
+ uint32_t class;
+ uint32_t l;
+ unsigned char irq, hdr_type, is_multi = 0;
+ struct pci_dev *dev, **bus_last;
+ struct pci_bus *child;
+
+#if 0
+ printk("scanning pci bus %d\n", bus->number );
+#endif
+
+ bus_last = &bus->devices;
+ max = bus->secondary;
+ for (devfn = 0; devfn < 0xff; ++devfn) {
+ if (PCI_FUNC(devfn) && !is_multi) {
+ /* not a multi-function device */
+ continue;
+ }
+ if (pcibios_read_config_byte(bus->number, devfn, PCI_HEADER_TYPE, &hdr_type))
+ continue;
+ if (!PCI_FUNC(devfn))
+ is_multi = hdr_type & 0x80;
+
+ if (pcibios_read_config_dword(bus->number, devfn, PCI_VENDOR_ID, &l) ||
+ /* some broken boards return 0 if a slot is empty: */
+ l == 0xffffffff || l == 0x00000000 || l == 0x0000ffff || l == 0xffff0000) {
+ is_multi = 0;
+ continue;
+ }
+
+ dev = salloc(sizeof(*dev));
+ dev->bus = bus;
+ dev->devfn = devfn;
+ dev->vendor = l & 0xffff;
+ dev->device = (l >> 16) & 0xffff;
+
+ pcibios_read_config_dword(bus->number, devfn,
+ PCI_CLASS_REVISION, &class);
+ class >>= 8; /* upper 3 bytes */
+ dev->class = class;
+ class >>= 8;
+ dev->hdr_type = hdr_type;
+
+ switch (hdr_type & 0x7f) { /* header type */
+ case PCI_HEADER_TYPE_NORMAL: /* standard header */
+ if (class == PCI_CLASS_BRIDGE_PCI)
+ goto bad;
+ /*
+ * If the card generates interrupts, read IRQ number
+ * (some architectures change it during pcibios_fixup())
+ */
+ pcibios_read_config_byte(bus->number, dev->devfn, PCI_INTERRUPT_PIN, &irq);
+ if (irq)
+ pcibios_read_config_byte(bus->number, dev->devfn, PCI_INTERRUPT_LINE, &irq);
+ dev->irq = irq;
+ /*
+ * read base address registers, again pcibios_fixup() can
+ * tweak these
+ */
+ pci_read_bases(dev, 6);
+ pcibios_read_config_dword(bus->number, devfn, PCI_ROM_ADDRESS, &l);
+ dev->rom_address = (l == 0xffffffff) ? 0 : l;
+ break;
+ case PCI_HEADER_TYPE_BRIDGE: /* bridge header */
+ if (class != PCI_CLASS_BRIDGE_PCI)
+ goto bad;
+ pci_read_bases(dev, 2);
+ pcibios_read_config_dword(bus->number, devfn, PCI_ROM_ADDRESS1, &l);
+ dev->rom_address = (l == 0xffffffff) ? 0 : l;
+ break;
+ case PCI_HEADER_TYPE_CARDBUS: /* CardBus bridge header */
+ if (class != PCI_CLASS_BRIDGE_CARDBUS)
+ goto bad;
+ pci_read_bases(dev, 1);
+ break;
+
+ default: /* unknown header */
+ bad:
+ printk("PCI device with unknown header type %d ignored.\n",
+ hdr_type&0x7f);
+ continue;
+ }
+
+ /*
+ * Put it into the global PCI device chain. It's used to
+ * find devices once everything is set up.
+ */
+ *pci->last_dev_p = dev;
+ pci->last_dev_p = &dev->next;
+
+ /*
+ * Now insert it into the list of devices held
+ * by the parent bus.
+ */
+ *bus_last = dev;
+ bus_last = &dev->sibling;
+
+ }
+
+ /*
+ * After performing arch-dependent fixup of the bus, look behind
+ * all PCI-to-PCI bridges on this bus.
+ */
+ for(dev=bus->devices; dev; dev=dev->sibling)
+ /*
+ * If it's a bridge, scan the bus behind it.
+ */
+ if ((dev->class >> 8) == PCI_CLASS_BRIDGE_PCI) {
+ uint32_t buses;
+ unsigned int devfn = dev->devfn;
+ unsigned short cr;
+
+ /*
+ * Insert it into the tree of buses.
+ */
+ child = salloc(sizeof(*child));
+ child->next = bus->children;
+ bus->children = child;
+ child->self = dev;
+ child->parent = bus;
+
+ /*
+ * Set up the primary, secondary and subordinate
+ * bus numbers.
+ */
+ child->number = child->secondary = ++max;
+ child->primary = bus->secondary;
+ child->subordinate = 0xff;
+ /*
+ * Clear all status bits and turn off memory,
+ * I/O and master enables.
+ */
+ pcibios_read_config_word(bus->number, devfn, PCI_COMMAND, &cr);
+ pcibios_write_config_word(bus->number, devfn, PCI_COMMAND, 0x0000);
+ pcibios_write_config_word(bus->number, devfn, PCI_STATUS, 0xffff);
+ /*
+ * Read the existing primary/secondary/subordinate bus
+ * number configuration to determine if the PCI bridge
+ * has already been configured by the system. If so,
+ * do not modify the configuration, merely note it.
+ */
+ pcibios_read_config_dword(bus->number, devfn, PCI_PRIMARY_BUS, &buses);
+ if ((buses & 0xFFFFFF) != 0)
+ {
+ unsigned int cmax;
+
+ child->primary = buses & 0xFF;
+ child->secondary = (buses >> 8) & 0xFF;
+ child->subordinate = (buses >> 16) & 0xFF;
+ child->number = child->secondary;
+ cmax = pci_scan_bus(child);
+ if (cmax > max) max = cmax;
+ }
+ else
+ {
+ /*
+ * Configure the bus numbers for this bridge:
+ */
+ buses &= 0xff000000;
+ buses |=
+ (((unsigned int)(child->primary) << 0) |
+ ((unsigned int)(child->secondary) << 8) |
+ ((unsigned int)(child->subordinate) << 16));
+ pcibios_write_config_dword(bus->number, devfn, PCI_PRIMARY_BUS, buses);
+ /*
+ * Now we can scan all subordinate buses:
+ */
+ max = pci_scan_bus(child);
+ /*
+ * Set the subordinate bus number to its real
+ * value:
+ */
+ child->subordinate = max;
+ buses = (buses & 0xff00ffff)
+ | ((unsigned int)(child->subordinate) << 16);
+ pcibios_write_config_dword(bus->number, devfn, PCI_PRIMARY_BUS, buses);
+ }
+ pcibios_write_config_word(bus->number, devfn, PCI_COMMAND, cr );
+ }
+
+ /*
+ * We've scanned the bus and so we know all about what's on
+ * the other side of any bridges that may be on this bus plus
+ * any devices.
+ *
+ * Return how far we've got finding sub-buses.
+ */
+ return max;
+}
+
+#if 0
+
+void
+pci_fixup(void)
+{
+ struct pci_dev *p;
+ struct pci_bus *bus;
+
+ for (bus = &pci_root; bus; bus=bus->next)
+ {
+ for (p=bus->devices; p; p=p->sibling)
+ {
+ }
+ }
+}
+
+static void print_pci_info()
+{
+ pci_resource *r;
+ struct pci_bus *pb = &pci_root;
+
+ printk("\n");
+ printk("PCI busses:\n");
+
+ for(pb= &pci_root; pb; pb=pb->children )
+ {
+ printk(" number %d, primary %d, secondary %d, subordinate %d\n",
+ pb->number,
+ pb->primary,
+ pb->secondary,
+ pb->subordinate );
+ printk(" bridge; vendor %04x, device %04x\n",
+ pb->self->vendor,
+ pb->self->device );
+
+ {
+ struct pci_dev *pd;
+
+ for(pd= pb->devices; pd; pd=pd->sibling )
+ {
+ printk(" vendor %04x, device %04x, irq %d\n",
+ pd->vendor,
+ pd->device,
+ pd->irq );
+
+ }
+ printk("\n");
+ }
+
+ }
+ printk("\n");
+
+ printk("PCI resources:\n");
+ for (r=pci->resources; r; r= r->next)
+ {
+ printk(" bus %d, vendor %04x, device %04x, base %08x, size %08x, type %d\n",
+ r->dev->bus->number,
+ r->dev->vendor,
+ r->dev->device,
+ r->base,
+ r->size,
+ r->type );
+ }
+ printk("\n");
+
+ return;
+}
+
+#endif
+
+static struct _addr_start
+{
+ uint32_t start_pcimem;
+ uint32_t start_pciio;
+ uint32_t start_prefetch;
+} astart;
+
+static pci_resource *enum_device_resources( struct pci_dev *pdev, int i )
+{
+ pci_resource *r;
+
+ for(r= pci->resources; r; r= r->next )
+ {
+ if( r->dev == pdev )
+ {
+ if( i-- == 0 ) break;
+ }
+ }
+ return r;
+}
+
+static void recursive_bus_reconfigure( struct pci_bus *pbus )
+{
+ struct pci_dev *pdev;
+ struct pci_bus *childbus;
+ int isroot = 0;
+
+ if( !pbus )
+ {
+ /* start with the root bus */
+ astart.start_pcimem = BUSREST_MEM_START;
+ astart.start_pciio = BUSREST_IO_START;
+ astart.start_prefetch = ((BUSREST_MEM_END >> 16) << 16);
+
+ pbus = &pci_root;
+ isroot = -1;
+ }
+
+#define WRITE_BRIDGE_IO
+#define WRITE_BRIDGE_MEM
+#define WRITE_BRIDGE_PF
+#define WRITE_BRIDGE_ENABLE
+
+/*
+** Run thru the p2p bridges on this bus and recurse into subordinate busses
+*/
+ for( childbus= pbus->children; childbus; childbus= childbus->next )
+ {
+ pdev= childbus->self;
+
+ pcibios_write_config_byte(pdev->bus->number, pdev->devfn, PCI_LATENCY_TIMER, 0x80 );
+ pcibios_write_config_byte(pdev->bus->number, pdev->devfn, PCI_SEC_LATENCY_TIMER, 0x80 );
+
+ {
+ struct _addr_start addrhold;
+ uint8_t base8, limit8;
+ uint16_t base16, limit16, ubase16, ulimit16;
+
+ /* save the base address values */
+ memcpy( &addrhold, &astart, sizeof(struct _addr_start));
+
+ recursive_bus_reconfigure( childbus );
+
+#ifdef PCI_DEBUG
+ printk("pci: configuring bus %d bridge (%04x:%04x), bus %d : (%d-%d)\n",
+ pdev->bus->number,
+ pdev->vendor,
+ pdev->device,
+ childbus->primary,
+ childbus->secondary,
+ childbus->subordinate );
+#endif
+
+ /*
+ * use the current values & the saved ones to figure out
+ * the address spaces for the bridge
+ */
+
+ if( addrhold.start_pciio == astart.start_pciio )
+ {
+ base8 = limit8 = 0xff;
+ ubase16 = ulimit16 = 0xffff;
+ }
+ else
+ {
+ base8 = (uint8_t) ((addrhold.start_pciio >> 8) & 0xf0);
+ ubase16 = (uint16_t)(addrhold.start_pciio >> 16);
+ limit8 = (uint8_t) ((astart.start_pciio >> 8 ) & 0xf0);
+ ulimit16 = (uint16_t)(astart.start_pciio >> 16);
+ astart.start_pciio += 0x1000;
+ }
+
+#ifdef PCI_DEBUG
+ printk("pci: io base %08x limit %08x\n", (base8<<8)+(ubase16<<16), (limit8<<8)+(ulimit16<<16));
+#endif
+#ifdef WRITE_BRIDGE_IO
+ pcibios_write_config_word(pdev->bus->number, pdev->devfn, PCI_IO_BASE_UPPER16, ubase16 );
+ pcibios_write_config_byte(pdev->bus->number, pdev->devfn, PCI_IO_BASE, base8 );
+
+ pcibios_write_config_word(pdev->bus->number, pdev->devfn, PCI_IO_LIMIT_UPPER16, ulimit16 );
+ pcibios_write_config_byte(pdev->bus->number, pdev->devfn, PCI_IO_LIMIT, limit8 );
+#endif
+
+ if( addrhold.start_pcimem == astart.start_pcimem )
+ {
+ limit16 = 0;
+ base16 = 0xffff;
+ }
+ else
+ {
+ limit16= (uint16_t)((astart.start_pcimem >> 16) & 0xfff0);
+ base16 = (uint16_t)((addrhold.start_pcimem >> 16) & 0xfff0);
+ astart.start_pcimem += 0x100000;
+ }
+#ifdef PCI_DEBUG
+ printk("pci: memory %04x, limit %04x\n", base16, limit16);
+#endif
+#ifdef WRITE_BRIDGE_MEM
+ pcibios_write_config_word(pdev->bus->number, pdev->devfn, PCI_MEMORY_BASE, base16 );
+ pcibios_write_config_word(pdev->bus->number, pdev->devfn, PCI_MEMORY_LIMIT, limit16 );
+#endif
+
+
+ if( astart.start_prefetch == addrhold.start_prefetch )
+ {
+ limit16 = 0;
+ base16 = 0xffff;
+ }
+ else
+ {
+ limit16= (uint16_t)((addrhold.start_prefetch >> 16) & 0xfff0);
+ base16 = (uint16_t)((astart.start_prefetch >> 16) & 0xfff0);
+ astart.start_prefetch -= 0x100000;
+ }
+#ifdef PCI_DEBUG
+ printk("pci: pf memory %04x, limit %04x\n", base16, limit16);
+#endif
+#ifdef WRITE_BRIDGE_PF
+ pcibios_write_config_dword(pdev->bus->number, pdev->devfn, PCI_PREF_BASE_UPPER32, 0);
+ pcibios_write_config_word(pdev->bus->number, pdev->devfn, PCI_PREF_MEMORY_BASE, base16 );
+ pcibios_write_config_dword(pdev->bus->number, pdev->devfn, PCI_PREF_LIMIT_UPPER32, 0);
+ pcibios_write_config_word(pdev->bus->number, pdev->devfn, PCI_PREF_MEMORY_LIMIT, limit16 );
+#endif
+
+#ifdef WRITE_BRIDGE_ENABLE
+ pcibios_write_config_word(pdev->bus->number,
+ pdev->devfn,
+ PCI_BRIDGE_CONTROL,
+ (uint16_t)( 0 ));
+
+ pcibios_write_config_word(pdev->bus->number,
+ pdev->devfn,
+ PCI_COMMAND,
+ (uint16_t)( PCI_COMMAND_IO |
+ PCI_COMMAND_MEMORY |
+ PCI_COMMAND_MASTER ));
+#endif
+ }
+ }
+
+ if( !isroot )
+ {
+#ifdef PCI_DEBUG
+ printk("pci: Configuring devices on bus %d\n", pbus->number);
+#endif
+ /*
+ ** Run thru this bus and set up addresses for all the non-bridge devices
+ */
+ for( pdev = pbus->devices; pdev; pdev= pdev->sibling )
+ {
+ if( (pdev->class >> 8) != PCI_CLASS_BRIDGE_PCI )
+ {
+ pci_resource *r;
+ int i = 0;
+ unsigned alloc;
+
+ /* enumerate all the resources defined by this device & reserve space
+ ** for each of their defined regions.
+ */
+
+#ifdef PCI_DEBUG
+ printk("pci: configuring; vendor %04x, device %04x\n", pdev->vendor, pdev->device );
+#endif
+
+ while( (r= enum_device_resources( pdev, i++ )) )
+ {
+ /*
+ ** Force all memory spaces to be non-prefetchable because
+ ** on the pci bus, byte-wise reads against prefetchable
+ ** memory are applied as 32 bit reads, which is a pain
+ ** when you're trying to talk to hardware. This is a
+ ** little sub-optimal because the algorithm doesn't sort
+ ** the address regions to pack them in, OTOH, perhaps its
+ ** not so bad because the inefficient packing will help
+ ** avoid buffer overflow/underflow problems.
+ */
+#if 0
+ if( (r->type & PCI_BASE_ADDRESS_MEM_PREFETCH) )
+ {
+ /* prefetchable space */
+
+ /* shift base pointer down to an integer multiple of the size of the desired region */
+ astart.start_prefetch -= (alloc= ((r->size / PAGE_SIZE) + 1) * PAGE_SIZE);
+ /* shift base pointer down to an integer multiple of the size of the desired region */
+ astart.start_prefetch = (astart.start_prefetch / r->size) * r->size;
+
+ r->base = astart.start_prefetch;
+#ifdef PCI_DEBUG
+ printk("pci: pf %08X, size %08X, alloc %08X\n", r->base, r->size, alloc );
+#endif
+ }
+#endif
+ if( r->type & PCI_BASE_ADDRESS_SPACE_IO )
+ {
+ /* io space */
+
+ /* shift base pointer up to an integer multiple of the size of the desired region */
+ if( astart.start_pciio % r->size )
+ astart.start_pciio = (((astart.start_pciio / r->size) + 1) * r->size);
+
+ r->base = astart.start_pciio;
+ astart.start_pciio += (alloc= ((r->size / PAGE_SIZE) + 1) * PAGE_SIZE);
+#ifdef PCI_DEBUG
+ printk("pci: io %08X, size %08X, alloc %08X\n", r->base, r->size, alloc );
+#endif
+ }
+ else
+ {
+ /* memory space */
+
+ /* shift base pointer up to an integer multiple of the size of the desired region */
+ if( astart.start_pcimem % r->size )
+ astart.start_pcimem = (((astart.start_pcimem / r->size) + 1) * r->size);
+
+ r->base = astart.start_pcimem;
+ astart.start_pcimem += (alloc= ((r->size / PAGE_SIZE) + 1) * PAGE_SIZE);
+#ifdef PCI_DEBUG
+ printk("pci: mem %08X, size %08X, alloc %08X\n", r->base, r->size, alloc );
+#endif
+ }
+ }
+
+ }
+ }
+ }
+
+}
+
+void pci_init(void)
+{
+ PPC_DEVICE *hostbridge;
+
+ if (pci->last_dev_p) {
+ printk("Two or more calls to pci_init!\n");
+ return;
+ }
+ pci->last_dev_p = &(bd->pci_devices);
+ hostbridge=residual_find_device(PROCESSORDEVICE, NULL,
+ BridgeController,
+ PCIBridge, -1, 0);
+ if (hostbridge) {
+ if (hostbridge->DeviceId.Interface==PCIBridgeIndirect) {
+ bd->pci_functions=&indirect_functions;
+ /* Should be extracted from residual data,
+ * indeed MPC106 in CHRP mode is different,
+ * but we should not use residual data in
+ * this case anyway.
+ */
+ pci->config_addr = ((volatile void *)
+ (ptr_mem_map->io_base+0xcf8));
+ pci->config_data = ptr_mem_map->io_base+0xcfc;
+ } else if(hostbridge->DeviceId.Interface==PCIBridgeDirect) {
+ bd->pci_functions=&direct_functions;
+ pci->config_data=(u_char *) 0x80800000;
+ } else {
+ }
+ } else {
+ /* Let us try by experimentation at our own risk! */
+ uint32_t id0;
+ bd->pci_functions = &direct_functions;
+ /* On all direct bridges I know the host bridge itself
+ * appears as device 0 function 0.
+ */
+ pcibios_read_config_dword(0, 0, PCI_VENDOR_ID, &id0);
+ if (id0==~0U) {
+ bd->pci_functions = &indirect_functions;
+ pci->config_addr = ((volatile u_int *)
+ (ptr_mem_map->io_base+0xcf8));
+ pci->config_data = ptr_mem_map->io_base+0xcfc;
+ }
+ /* Here we should check that the host bridge is actually
+ * present, but if it not, we are in such a desperate
+ * situation, that we probably can't even tell it.
+ */
+ }
+ /* Now build a small database of all found PCI devices */
+ printk("\nPCI: Probing PCI hardware\n");
+ pci_root.subordinate=pci_scan_bus(&pci_root);
+
+ print_pci_resources("Installed PCI resources:\n");
+
+ recursive_bus_reconfigure(NULL);
+
+ reconfigure_pci();
+
+ print_pci_resources("Allocated PCI resources:\n");
+
+#if 0
+ print_pci_info();
+#endif
+}
+
+/* eof */
diff --git a/bsps/powerpc/motorola_powerpc/bootloader/pci.h b/bsps/powerpc/motorola_powerpc/bootloader/pci.h
new file mode 100644
index 0000000000..4281a13090
--- /dev/null
+++ b/bsps/powerpc/motorola_powerpc/bootloader/pci.h
@@ -0,0 +1,95 @@
+/*
+ * PCI defines and function prototypes
+ * Copyright 1994, Drew Eckhardt
+ * Copyright 1997, 1998 Martin Mares <mj@atrey.karlin.mff.cuni.cz>
+ *
+ * For more information, please consult the following manuals (look at
+ * http://www.pcisig.com/ for how to get them):
+ *
+ * PCI BIOS Specification
+ * PCI Local Bus Specification
+ * PCI to PCI Bridge Specification
+ * PCI System Design Guide
+ */
+
+#ifndef BOOTLOADER_PCI_H
+#define BOOTLOADER_PCI_H
+
+#include <rtems/pci.h>
+
+
+/* Functions used to access pci configuration space */
+struct pci_bootloader_config_access_functions {
+ int (*read_config_byte)(unsigned char, unsigned char,
+ unsigned char, uint8_t *);
+ int (*read_config_word)(unsigned char, unsigned char,
+ unsigned char, uint16_t *);
+ int (*read_config_dword)(unsigned char, unsigned char,
+ unsigned char, uint32_t *);
+ int (*write_config_byte)(unsigned char, unsigned char,
+ unsigned char, uint8_t);
+ int (*write_config_word)(unsigned char, unsigned char,
+ unsigned char, uint16_t);
+ int (*write_config_dword)(unsigned char, unsigned char,
+ unsigned char, uint32_t);
+};
+
+/*
+ * There is one pci_dev structure for each slot-number/function-number
+ * combination:
+ */
+struct pci_dev {
+ struct pci_bus *bus; /* bus this device is on */
+ struct pci_dev *sibling; /* next device on this bus */
+ struct pci_dev *next; /* chain of all devices */
+
+ void *sysdata; /* hook for sys-specific extension */
+ struct proc_dir_entry *procent; /* device entry in /proc/bus/pci */
+
+ unsigned int devfn; /* encoded device & function index */
+ unsigned short vendor;
+ unsigned short device;
+ unsigned int class; /* 3 bytes: (base,sub,prog-if) */
+ unsigned int hdr_type; /* PCI header type */
+ unsigned int master : 1; /* set if device is master capable */
+ /*
+ * In theory, the irq level can be read from configuration
+ * space and all would be fine. However, old PCI chips don't
+ * support these registers and return 0 instead. For example,
+ * the Vision864-P rev 0 chip can uses INTA, but returns 0 in
+ * the interrupt line and pin registers. pci_init()
+ * initializes this field with the value at PCI_INTERRUPT_LINE
+ * and it is the job of pcibios_fixup() to change it if
+ * necessary. The field must not be 0 unless the device
+ * cannot generate interrupts at all.
+ */
+ unsigned int irq; /* irq generated by this device */
+
+ /* Base registers for this device, can be adjusted by
+ * pcibios_fixup() as necessary.
+ */
+ unsigned long base_address[6];
+ unsigned long rom_address;
+};
+
+struct pci_bus {
+ struct pci_bus *parent; /* parent bus this bridge is on */
+ struct pci_bus *children; /* chain of P2P bridges on this bus */
+ struct pci_bus *next; /* chain of all PCI buses */
+
+ struct pci_dev *self; /* bridge device as seen by parent */
+ struct pci_dev *devices; /* devices behind this bridge */
+
+ void *sysdata; /* hook for sys-specific extension */
+ struct proc_dir_entry *procdir; /* directory entry in /proc/bus/pci */
+
+ unsigned char number; /* bus number */
+ unsigned char primary; /* number of primary bridge */
+ unsigned char secondary; /* number of secondary bridge */
+ unsigned char subordinate; /* max number of subordinate buses */
+};
+
+extern struct pci_bus pci_root; /* root bus */
+extern struct pci_dev *pci_devices; /* list of all devices */
+
+#endif /* BOOTLOADER_PCI_H */
diff --git a/bsps/powerpc/motorola_powerpc/bootloader/ppcboot.lds b/bsps/powerpc/motorola_powerpc/bootloader/ppcboot.lds
new file mode 100644
index 0000000000..b47e01f172
--- /dev/null
+++ b/bsps/powerpc/motorola_powerpc/bootloader/ppcboot.lds
@@ -0,0 +1,96 @@
+OUTPUT_ARCH(powerpc)
+OUTPUT_FORMAT ("elf32-powerpc", "elf32-powerpc", "elf32-powerpc")
+/* Do we need any of these for elf?
+ __DYNAMIC = 0; */
+SECTIONS
+{
+ .text :
+ {
+ /* We have to build the header by hand, painful since ppcboot
+ format support is very poor in binutils.
+ objdump -b ppcboot zImage --all-headers can be used to check. */
+ /* The following line can be added as a branch to use the same image
+ * for netboot as for prepboots, the only problem is that objdump
+ * did not in this case recognize the format since it insisted
+ * in checking the x86 code area held only zeroes.
+ */
+ LONG(0x48000000+start);
+ . = 0x1be; BYTE(0x80); BYTE(0)
+ BYTE(2); BYTE(0); BYTE(0x41); BYTE(1);
+ BYTE(0x12); BYTE(0x4f); LONG(0);
+ BYTE(((_edata + 0x1ff)>>9)&0xff);
+ BYTE(((_edata + 0x1ff)>>17)&0xff);
+ BYTE(((_edata + 0x1ff)>>25)&0xff);
+ . = 0x1fe;
+ BYTE(0x55);
+ BYTE(0xaa);
+ BYTE(start&0xff);
+ BYTE((start>>8)&0xff);
+ BYTE((start>>16)&0xff);
+ BYTE((start>>24)&0xff);
+ BYTE(_edata&0xff);
+ BYTE((_edata>>8)&0xff);
+ BYTE((_edata>>16)&0xff);
+ BYTE((_edata>>24)&0xff);
+ BYTE(0); /* flags */
+ BYTE(0); /* os_id */
+ BYTE(0x4C); BYTE(0x69); BYTE(0x6e);
+ BYTE(0x75); BYTE(0x78); /* Partition name */
+ . = 0x400;
+ *(.text)
+ *(.sdata2)
+ *(.rodata)
+ *(.rodata*)
+ }
+/* . = ALIGN(16); */
+ .image :
+ {
+ rtems.gz(*)
+ . = ALIGN(4);
+ *.gz(*)
+ }
+ /* Read-write section, merged into data segment: */
+ /* . = ALIGN(4096); */
+ .reloc :
+ {
+ *(.got)
+ _GOT2_TABLE_ = .;
+ *(.got2)
+ _FIXUP_TABLE_ = .;
+ *(.fixup)
+ }
+
+ __got2_entries = (_FIXUP_TABLE_ - _GOT2_TABLE_) >>2;
+ __fixup_entries = (. - _FIXUP_TABLE_)>>2;
+
+ .handlers :
+ {
+ *(.exception)
+ }
+
+ .data :
+ {
+ *(.data)
+ *(.data*)
+ *(.sdata)
+ . = ALIGN(4);
+ _edata = .;
+ }
+ PROVIDE(_binary_initrd_gz_start = 0);
+ PROVIDE(_binary_initrd_gz_end = 0);
+ _rtems_gz_size = _binary_rtems_gz_end - _binary_rtems_gz_start;
+ _rtems_size = __rtems_end - __rtems_start;
+ .bss :
+ {
+ *(.sbss)
+ *(.bss)
+ . = ALIGN(4);
+ }
+ __bss_words = SIZEOF(.bss)>>2;
+ __size = . ;
+ /DISCARD/ :
+ {
+ *(.comment)
+ }
+}
+
diff --git a/bsps/powerpc/motorola_powerpc/bootloader/qemu_fakeres.c b/bsps/powerpc/motorola_powerpc/bootloader/qemu_fakeres.c
new file mode 100644
index 0000000000..51f03a070f
--- /dev/null
+++ b/bsps/powerpc/motorola_powerpc/bootloader/qemu_fakeres.c
@@ -0,0 +1,226 @@
+#include <bsp/residual.h>
+#include <stdint.h>
+
+/* Magic knowledge - qemu loads image here.
+ * However, we use the value from NVRAM if possible...
+ */
+#define KERNELBASE 0x01000000
+
+/* When starting qemu make sure to give the correct
+ * amount of memory!
+ *
+ * NOTE: Code now supports reading the actual memory
+ * amount from NVRAM. The residual copy in RAM
+ * is fixed-up accordingly.
+ */
+#define MEM_MEGS 32
+
+/* Mock up a minimal/fake residual; just enough to make the
+ * bootloader happy.
+ */
+struct fake_data {
+ unsigned long dat_len;
+ unsigned long res_off;
+ unsigned long cmd_off;
+ unsigned long cmd_len;
+ unsigned long img_adr;
+ RESIDUAL fake_residual;
+ char cmdline[1024];
+} fake_data = {
+dat_len: sizeof(fake_data),
+res_off: (unsigned long) &fake_data.fake_residual
+ -(unsigned long) &fake_data,
+cmd_off: (unsigned long) &fake_data.cmdline
+ -(unsigned long) &fake_data,
+cmd_len: sizeof(fake_data.cmdline),
+img_adr: KERNELBASE,
+fake_residual:
+{
+ ResidualLength: sizeof(RESIDUAL),
+ Version: 0,
+ Revision: 0,
+ EC: 0,
+ VitalProductData: {
+ FirmwareSupplier: QEMU,
+ ProcessorHz: 300000000, /* fantasy */
+ ProcessorBusHz: 100000000, /* qemu timebase clock */
+ TimeBaseDivisor:1*1000,
+ },
+ MaxNumCpus: 1,
+ ActualNumCpus: 1,
+ Cpus: {
+ {
+ CpuType: 0x00040103, /* FIXME: fill from PVR */
+ CpuNumber: 0,
+ CpuState: 0,
+ },
+ },
+ /* Memory */
+ TotalMemory: 1024*1024*MEM_MEGS,
+ GoodMemory: 1024*1024*MEM_MEGS,
+ ActualNumMemSegs: 13,
+ Segs: {
+ { 0x2000, 0xFFF00, 0x00100 },
+ { 0x0020, MEM_MEGS*0x100, 0x80000 - MEM_MEGS*0x100 },
+ { 0x0008, 0x00800, 0x00168 },
+ { 0x0004, 0x00000, 0x00005 },
+ { 0x0001, 0x006F1, 0x0010F },
+ { 0x0002, 0x006AD, 0x00044 },
+ { 0x0010, 0x00005, 0x006A8 },
+ { 0x0010, 0x00968, MEM_MEGS*0x100 - 0x00968 },
+ { 0x0800, 0xC0000, 0x3F000 },
+ { 0x0600, 0xBF800, 0x00800 },
+ { 0x0500, 0x81000, 0x3E800 },
+ { 0x0480, 0x80800, 0x00800 },
+ { 0x0440, 0x80000, 0x00800 }
+ },
+ ActualNumMemories: 0,
+ Memories: {
+ {0},
+ },
+ /* Devices */
+ ActualNumDevices: 1,
+ Devices: {
+ {
+ DeviceId: {
+ BusId: PROCESSORDEVICE,
+ BaseType: BridgeController,
+ SubType: PCIBridge,
+ Interface: PCIBridgeIndirect,
+ },
+ }
+ },
+ DevicePnPHeap: {0}
+},
+/* This is overwritten by command line passed by qemu. */
+cmdline: {
+ '-','-','n','e','2','k','-','i','r','q','=','9',
+ 0,
+}
+};
+
+/* Read one byte from NVRAM */
+static inline uint8_t
+nvram_rd(void)
+{
+uint8_t rval = *(volatile uint8_t*)0x80000077;
+ asm volatile("eieio");
+ return rval;
+}
+
+/* Set NVRAM address pointer */
+static inline void
+nvram_addr(uint16_t addr)
+{
+ *(volatile uint8_t*)0x80000074 = (addr & 0xff);
+ asm volatile("eieio");
+ *(volatile uint8_t*)0x80000075 = ((addr>>8) & 0xff);
+ asm volatile("eieio");
+}
+
+/* Read a 32-bit (big-endian) work from NVRAM */
+static uint32_t
+nvram_rdl_be(uint16_t addr)
+{
+int i;
+uint32_t rval = 0;
+ for ( i=0; i<sizeof(rval); i++ ) {
+ nvram_addr( addr + i );
+ rval = (rval<<8) | nvram_rd();
+ }
+ return rval;
+}
+
+
+/* !!! NOTE !!!
+ *
+ * We use a special hack to propagate command-line info to the bootloader.
+ * This is NOT PreP compliant (but who cares).
+ * We set R6 and R7 to point to the start/end of the command line string
+ * and hacked the bootloader so it uses R6/R7 (provided that the firmware
+ * is detected as 'QEMU').
+ *
+ * (see bootloader/mm.c, bootloader/misc.c, bootloader/bootldr.h, -- boot_data.cmd_line[])
+ */
+uint32_t
+res_copy(void)
+{
+struct fake_data *p;
+uint32_t addr, cmdl, l, imga;
+uint32_t mem_sz, pgs;
+int i;
+int have_nvram;
+
+ /* Make sure we have a valid NVRAM -- just check for 'QEMU' at the
+ * beginning
+ */
+ have_nvram = ( (('Q'<<24) | ('E'<<16) | ('M'<< 8) | ('U'<< 0)) == nvram_rdl_be( 0x0000 ) );
+
+ if ( !have_nvram ) {
+ /* reading NVRAM failed - fall back to using the static residual copy;
+ * this means no support for variable memory size or 'true' command line.
+ */
+ return (uint32_t)&fake_data;
+ }
+
+ /* Dilemma - we don't really know where to put the residual copy
+ * (original is in ROM and cannot be modified).
+ * We can't put it at the top of memory since the bootloader starts
+ * allocating memory from there, before saving the residual, that is.
+ * Too close to the final image might not work either because RTEMS
+ * zeroes its BSS *before* making its copies of the residual and commandline.
+ *
+ * For now we hope that appending to the kernel image works (and that
+ * the bootloader puts it somewhere safe).
+ */
+ imga = nvram_rdl_be( 0x0038 );
+ addr = imga + nvram_rdl_be( 0x003c );
+ addr += 0x1f;
+ addr &= ~(0x1f);
+
+ p = (struct fake_data *)addr;
+
+ /* commandline + length from NVRAM */
+ cmdl = nvram_rdl_be( 0x0040 );
+ l = nvram_rdl_be( 0x0044 );
+
+ if ( l > 0 ) {
+ /* have a command-line; copy it into our local buffer */
+ if ( l > sizeof( p->cmdline ) - 1 ) {
+ l = sizeof( p->cmdline ) - 1;
+ }
+ /* original may overlap our buffer; must safely move around */
+ if ( p->cmdline < (char*)cmdl ) {
+ for ( i=0; i<l; i++ ) {
+ p->cmdline[i] = ((char*)cmdl)[i];
+ }
+ } else {
+ for ( i=l-1; i>=0; i-- ) {
+ p->cmdline[i] = ((char*)cmdl)[i];
+ }
+ }
+ }
+ p->cmdline[l] = 0;
+ /* Copy rest of residual */
+ for ( i=0; i<sizeof(p->fake_residual); i++ )
+ ((char*)&p->fake_residual)[i] = ((char*)&fake_data.fake_residual)[i];
+ p->dat_len = fake_data.dat_len;
+ p->res_off = fake_data.res_off;
+ p->cmd_off = fake_data.cmd_off;
+ p->cmd_len = l+1;
+ p->img_adr = imga;
+
+ /* Fix up memory in residual from NVRAM settings */
+
+ mem_sz = nvram_rdl_be( 0x0030 );
+ pgs = mem_sz >> 12;
+
+ p->fake_residual.TotalMemory = mem_sz;
+ p->fake_residual.GoodMemory = mem_sz;
+
+ p->fake_residual.Segs[1].BasePage = pgs;
+ p->fake_residual.Segs[1].PageCount = 0x80000 - pgs;
+ p->fake_residual.Segs[7].PageCount = pgs - 0x00968;
+
+ return (uint32_t)p;
+}
diff --git a/bsps/powerpc/motorola_powerpc/bootloader/qemu_fakerom.S b/bsps/powerpc/motorola_powerpc/bootloader/qemu_fakerom.S
new file mode 100644
index 0000000000..b77c3bd138
--- /dev/null
+++ b/bsps/powerpc/motorola_powerpc/bootloader/qemu_fakerom.S
@@ -0,0 +1,217 @@
+/* A fake 'bios' which does nothing but move a kernel image
+ * to RAM address zero and then starts that...
+ */
+
+#include <bsp/residual.h>
+
+#define LD_CACHE_LINE_SIZE 5
+#define INIT_STACK (0x100 - 16) /* 16-byte/svr4 aligned */
+
+/* These offsets must correspond to declaration in qemu_fakeres.c */
+#define DAT_LEN 0
+#define RES_OFF 4
+#define CMD_OFF 8
+#define CMD_LEN 12
+#define IMG_ADR 16
+
+/* Non-volatile registers */
+#define OBASE 30
+#define PCID 25
+#define PCIA 26
+
+#define PCI_MAX_DEV 32
+
+#define BA_OPCODE(tgt) ((18<<(31-5)) | 2 | ((tgt) & 0x03fffffc))
+
+ .global fake_data
+ .global res_set_memsz
+
+ .global _start
+_start:
+ lis 1, INIT_STACK@h
+ ori 1,1,INIT_STACK@l
+
+ /* qemu 0.14.1 has the wrong exception prefix for 74xx CPUs
+ * (bug 811683). Work around this by putting a stub at 0x00000X00
+ * which simply jumps to high memory. We only need the SC exception
+ * for now.
+ */
+ lis 3, BA_OPCODE(0xfff00000)@h
+ ori 3, 3, BA_OPCODE(0xfff00000)@l
+ li 4, 0x0c00
+ add 3, 3, 4
+ stw 3, 0(4)
+ dcbf 0, 4
+ icbi 0, 4
+
+ bl pci_irq_set
+ /* copy residual to RAM and fix up;
+ * this routine returns a pointer to
+ * a 'fake_data' struct. If reading
+ * NVRAM failed then the return value
+ * points to a fall-back version in
+ * ROM...
+ */
+ bl res_copy
+ /* fake_data pointer to R29 */
+ mr 29, 3
+
+ /* Load up R3..R5 with PreP mandated
+ * values (R3: residual, R4: kernel image,
+ * R5: OpenFirmware PTR (or NULL).
+ */
+
+ /* load R3 with residual pointer */
+ lwz 3, RES_OFF(29)
+ add 3, 3, 29
+
+ /* load R4 with image address */
+ lwz 4, IMG_ADR(29)
+
+ /* load R5 with zero (OFW = NULL) */
+ li 5, 0
+ /* EXTENSION: R6 = cmdline start */
+ lwz 6, CMD_OFF(29)
+ add 6, 6, 29
+ /* EXTENSION: R7 = cmdline end */
+ lwz 7, CMD_LEN(29)
+ add 7, 7, 6
+
+ /* jump to image address */
+ mtctr 4
+ bctr
+
+ .org 0x100
+ b _start
+
+ .org 0x110
+template:
+ mfsrr0 30
+ mfsrr1 31
+1: b 1b
+template_end:
+
+ .org 0xc00
+ b monitor
+
+
+ .org 0x4000
+codemove: /* src/dst are cache-aligned */
+ addi 5,5,(1<<LD_CACHE_LINE_SIZE)-1
+ srwi 5,5,LD_CACHE_LINE_SIZE
+ addi 3,3,-4
+ addi 4,4,-4
+1:
+ li 0, (1<<LD_CACHE_LINE_SIZE)
+ mtctr 0
+2:
+ lwzu 0, 4(3)
+ stwu 0, 4(4)
+ bdnz 2b
+ dcbf 0,4
+ icbi 0,4
+ addic. 5,5,-1
+ bne 1b
+ blr
+
+cpexc:
+ lis 3,template@h
+ ori 3,3,template@l
+ li 5,template_end-template
+ b codemove
+
+monitor:
+ stwu 1,-16(1)
+ stw OBASE, 8(1)
+ lis OBASE, 0x80000000@h
+ cmplwi 10,0x63 /* enter_monitor -> RESET */
+ bne 10f
+hwreset:
+ li 3,1
+ stb 3,0x92(OBASE)
+1: b 1b
+10: cmplwi 10,0x1d /* .NETCTRL -> ignore */
+ bne 10f
+ b ret_from_mon
+10: b hwreset /* unknown -> RESET */
+
+ret_from_mon:
+ lwz OBASE,8(1)
+ lwz 1,0(1)
+ rfi
+
+rcb:
+ stwbrx 3, 0, PCIA
+ lbzx 3, 0, PCID
+ blr
+
+wcb:
+ stwbrx 3, 0, PCIA
+ stbx 4, 0, PCID
+ blr
+
+rcd:
+ stwbrx 3, 0, PCIA
+ lwbrx 3, 0, PCID
+ blr
+
+/* fixup pci interrupt line register according to what
+ * qemu does: line = ((pin-1) + slot_no) & 1 ? 11 : 9;
+ */
+pci_irq_set:
+ /* set up stack frame */
+ stwu 1, -32(1)
+ mflr 0
+ stw 0, 32+4(1)
+ /* load counter with # of PCI devs */
+ li 0, PCI_MAX_DEV
+ mtctr 0
+ /* save non-volatile registers we use
+ * in stack frame
+ */
+ stw 20, 8(1)
+ stw PCIA, 12(1)
+ stw PCID, 16(1)
+ /* load non-volatile registers with
+ * intended values.
+ */
+ lis 20, 0x80000000@h /* key for slot # 0 */
+ lis PCIA, 0x80000cf8@h /* PCI config space address reg */
+ ori PCIA, PCIA, 0x80000cf8@l
+ addi PCID, PCIA, 4 /* PCI config space data reg */
+
+ /* loop over all slots and fix up PCI IRQ LINE */
+1:
+ mr 3, 20
+ bl rcd
+ addi 3, 3, 1
+ cmplwi 3, 0 /* slot empty (= -1 + 1 = 0) ? */
+ beq 2f
+ addi 3, 20, 0x3d
+ bl rcb
+ cmplwi 3, 0
+ beq 2f
+ slwi 4, 3, 11
+ addi 3, 20, 0x3c
+ xor 4, 4, 3 /* bit 11 = slot # + irq_num [zero-based] + 1 */
+ andi. 4, 4, 0x0800
+ li 4, 11
+ beq 3f
+ li 4, 9
+3:
+ bl wcb
+2:
+ addi 20, 20, 0x0800 /* next slot */
+ bdnz 1b
+
+ /* restore and return */
+ lwz 20, 32+4(1)
+ mtlr 20
+ lwz PCID, 16(1)
+ lwz PCIA, 12(1)
+ lwz 20, 8(1)
+ lwz 1, 0(1)
+ blr
+
+ .section .romentry, "ax"
+ b _start
diff --git a/bsps/powerpc/motorola_powerpc/bootloader/zlib.c b/bsps/powerpc/motorola_powerpc/bootloader/zlib.c
new file mode 100644
index 0000000000..9d7efe6afc
--- /dev/null
+++ b/bsps/powerpc/motorola_powerpc/bootloader/zlib.c
@@ -0,0 +1,2102 @@
+/*
+ * This file is derived from various .h and .c files from the zlib-0.95
+ * distribution by Jean-loup Gailly and Mark Adler, with some additions
+ * by Paul Mackerras to aid in implementing Deflate compression and
+ * decompression for PPP packets. See zlib.h for conditions of
+ * distribution and use.
+ *
+ * Changes that have been made include:
+ * - changed functions not used outside this file to "local"
+ * - added minCompression parameter to deflateInit2
+ * - added Z_PACKET_FLUSH (see zlib.h for details)
+ * - added inflateIncomp
+ */
+
+/*+++++*/
+/* zutil.h -- internal interface and configuration of the compression library
+ * Copyright (C) 1995 Jean-loup Gailly.
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* WARNING: this file should *not* be used by applications. It is
+ part of the implementation of the compression library and is
+ subject to change. Applications should only use zlib.h.
+ */
+
+/* From: zutil.h,v 1.9 1995/05/03 17:27:12 jloup Exp */
+
+#define _Z_UTIL_H
+
+#include "zlib.h"
+
+#ifndef local
+# define local static
+#endif
+/* compile with -Dlocal if your debugger can't find static symbols */
+
+#define FAR
+
+typedef unsigned char uch;
+typedef uch FAR uchf;
+typedef unsigned short ush;
+typedef ush FAR ushf;
+typedef unsigned long ulg;
+
+extern char *z_errmsg[]; /* indexed by 1-zlib_error */
+
+#define ERR_RETURN(strm,err) return (strm->msg=z_errmsg[1-err], err)
+/* To be used only when the state is known to be valid */
+
+#ifndef NULL
+#define NULL ((void *) 0)
+#endif
+
+ /* common constants */
+
+#define DEFLATED 8
+
+#ifndef DEF_WBITS
+# define DEF_WBITS MAX_WBITS
+#endif
+/* default windowBits for decompression. MAX_WBITS is for compression only */
+
+#if MAX_MEM_LEVEL >= 8
+# define DEF_MEM_LEVEL 8
+#else
+# define DEF_MEM_LEVEL MAX_MEM_LEVEL
+#endif
+/* default memLevel */
+
+#define STORED_BLOCK 0
+#define STATIC_TREES 1
+#define DYN_TREES 2
+/* The three kinds of block type */
+
+#define MIN_MATCH 3
+#define MAX_MATCH 258
+/* The minimum and maximum match lengths */
+
+ /* functions */
+
+#include <string.h>
+#define zmemcpy memcpy
+#define zmemzero(dest, len) memset(dest, 0, len)
+
+/* Diagnostic functions */
+#ifdef DEBUG_ZLIB
+# include <stdio.h>
+# ifndef verbose
+# define verbose 0
+# endif
+# define Assert(cond, msg) {if(!(cond)) Trace(msg);}
+# define Trace(x) printk(x)
+# define Tracev(x) {if (verbose) printk x ;}
+# define Tracevv(x) {if (verbose>1) printk x ;}
+# define Tracec(c,x) {if (verbose && (c)) printk x ;}
+# define Tracecv(c,x) {if (verbose>1 && (c)) printk x ;}
+#else
+# define Assert(cond,msg)
+# define Trace(x)
+# define Tracev(x)
+# define Tracevv(x)
+# define Tracec(c,x)
+# define Tracecv(c,x)
+#endif
+
+typedef uLong (*check_func) OF((uLong check, Bytef *buf, uInt len));
+
+/* voidpf zcalloc OF((voidpf opaque, unsigned items, unsigned size)); */
+/* void zcfree OF((voidpf opaque, voidpf ptr)); */
+
+#define ZALLOC(strm, items, size) \
+ (*((strm)->zalloc))((strm)->opaque, (items), (size))
+#define ZFREE(strm, addr, size) \
+ (*((strm)->zfree))((strm)->opaque, (voidpf)(addr), (size))
+#define TRY_FREE(s, p, n) {if (p) ZFREE(s, p, n);}
+
+/* deflate.h -- internal compression state
+ * Copyright (C) 1995 Jean-loup Gailly
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* WARNING: this file should *not* be used by applications. It is
+ part of the implementation of the compression library and is
+ subject to change. Applications should only use zlib.h.
+ */
+
+/*+++++*/
+/* infblock.h -- header to use infblock.c
+ * Copyright (C) 1995 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* WARNING: this file should *not* be used by applications. It is
+ part of the implementation of the compression library and is
+ subject to change. Applications should only use zlib.h.
+ */
+
+struct inflate_blocks_state;
+typedef struct inflate_blocks_state FAR inflate_blocks_statef;
+
+local inflate_blocks_statef * inflate_blocks_new OF((
+ z_stream *z,
+ check_func c, /* check function */
+ uInt w)); /* window size */
+
+local int inflate_blocks OF((
+ inflate_blocks_statef *,
+ z_stream *,
+ int)); /* initial return code */
+
+local void inflate_blocks_reset OF((
+ inflate_blocks_statef *,
+ z_stream *,
+ uLongf *)); /* check value on output */
+
+local int inflate_blocks_free OF((
+ inflate_blocks_statef *,
+ z_stream *,
+ uLongf *)); /* check value on output */
+
+local int inflate_addhistory OF((
+ inflate_blocks_statef *,
+ z_stream *));
+
+local int inflate_packet_flush OF((
+ inflate_blocks_statef *));
+
+/*+++++*/
+/* inftrees.h -- header to use inftrees.c
+ * Copyright (C) 1995 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* WARNING: this file should *not* be used by applications. It is
+ part of the implementation of the compression library and is
+ subject to change. Applications should only use zlib.h.
+ */
+
+/* Huffman code lookup table entry--this entry is four bytes for machines
+ that have 16-bit pointers (e.g. PC's in the small or medium model). */
+
+typedef struct inflate_huft_s FAR inflate_huft;
+
+struct inflate_huft_s {
+ union {
+ struct {
+ Byte Exop; /* number of extra bits or operation */
+ Byte Bits; /* number of bits in this code or subcode */
+ } what;
+ uInt Nalloc; /* number of these allocated here */
+ Bytef *pad; /* pad structure to a power of 2 (4 bytes for */
+ } word; /* 16-bit, 8 bytes for 32-bit machines) */
+ union {
+ uInt Base; /* literal, length base, or distance base */
+ inflate_huft *Next; /* pointer to next level of table */
+ } more;
+};
+
+#ifdef DEBUG_ZLIB
+ local uInt inflate_hufts;
+#endif
+
+local int inflate_trees_bits OF((
+ uIntf *, /* 19 code lengths */
+ uIntf *, /* bits tree desired/actual depth */
+ inflate_huft * FAR *, /* bits tree result */
+ z_stream *)); /* for zalloc, zfree functions */
+
+local int inflate_trees_dynamic OF((
+ uInt, /* number of literal/length codes */
+ uInt, /* number of distance codes */
+ uIntf *, /* that many (total) code lengths */
+ uIntf *, /* literal desired/actual bit depth */
+ uIntf *, /* distance desired/actual bit depth */
+ inflate_huft * FAR *, /* literal/length tree result */
+ inflate_huft * FAR *, /* distance tree result */
+ z_stream *)); /* for zalloc, zfree functions */
+
+local int inflate_trees_fixed OF((
+ uIntf *, /* literal desired/actual bit depth */
+ uIntf *, /* distance desired/actual bit depth */
+ inflate_huft * FAR *, /* literal/length tree result */
+ inflate_huft * FAR *)); /* distance tree result */
+
+local int inflate_trees_free OF((
+ inflate_huft *, /* tables to free */
+ z_stream *)); /* for zfree function */
+
+/*+++++*/
+/* infcodes.h -- header to use infcodes.c
+ * Copyright (C) 1995 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* WARNING: this file should *not* be used by applications. It is
+ part of the implementation of the compression library and is
+ subject to change. Applications should only use zlib.h.
+ */
+
+struct inflate_codes_state;
+typedef struct inflate_codes_state FAR inflate_codes_statef;
+
+local inflate_codes_statef *inflate_codes_new OF((
+ uInt, uInt,
+ inflate_huft *, inflate_huft *,
+ z_stream *));
+
+local int inflate_codes OF((
+ inflate_blocks_statef *,
+ z_stream *,
+ int));
+
+local void inflate_codes_free OF((
+ inflate_codes_statef *,
+ z_stream *));
+
+/*+++++*/
+/* inflate.c -- zlib interface to inflate modules
+ * Copyright (C) 1995 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* inflate private state */
+struct internal_state {
+
+ /* mode */
+ enum {
+ METHOD, /* waiting for method byte */
+ FLAG, /* waiting for flag byte */
+ BLOCKS, /* decompressing blocks */
+ CHECK4, /* four check bytes to go */
+ CHECK3, /* three check bytes to go */
+ CHECK2, /* two check bytes to go */
+ CHECK1, /* one check byte to go */
+ DONE, /* finished check, done */
+ BAD} /* got an error--stay here */
+ mode; /* current inflate mode */
+
+ /* mode dependent information */
+ union {
+ uInt method; /* if FLAGS, method byte */
+ struct {
+ uLong was; /* computed check value */
+ uLong need; /* stream check value */
+ } check; /* if CHECK, check values to compare */
+ uInt marker; /* if BAD, inflateSync's marker bytes count */
+ } sub; /* submode */
+
+ /* mode independent information */
+ int nowrap; /* flag for no wrapper */
+ uInt wbits; /* log2(window size) (8..15, defaults to 15) */
+ inflate_blocks_statef
+ *blocks; /* current inflate_blocks state */
+
+};
+
+int inflateReset(z)
+z_stream *z;
+{
+ uLong c;
+
+ if (z == Z_NULL || z->state == Z_NULL)
+ return Z_STREAM_ERROR;
+ z->total_in = z->total_out = 0;
+ z->msg = Z_NULL;
+ z->state->mode = z->state->nowrap ? BLOCKS : METHOD;
+ inflate_blocks_reset(z->state->blocks, z, &c);
+ Trace("inflate: reset\n");
+ return Z_OK;
+}
+
+int inflateEnd(z)
+z_stream *z;
+{
+ uLong c;
+
+ if (z == Z_NULL || z->state == Z_NULL || z->zfree == Z_NULL)
+ return Z_STREAM_ERROR;
+ if (z->state->blocks != Z_NULL)
+ inflate_blocks_free(z->state->blocks, z, &c);
+ ZFREE(z, z->state, sizeof(struct internal_state));
+ z->state = Z_NULL;
+ Trace("inflate: end\n");
+ return Z_OK;
+}
+
+int inflateInit2(z, w)
+z_stream *z;
+int w;
+{
+ /* initialize state */
+ if (z == Z_NULL)
+ return Z_STREAM_ERROR;
+/* if (z->zalloc == Z_NULL) z->zalloc = zcalloc; */
+/* if (z->zfree == Z_NULL) z->zfree = zcfree; */
+ if ((z->state = (struct internal_state FAR *)
+ ZALLOC(z,1,sizeof(struct internal_state))) == Z_NULL)
+ return Z_MEM_ERROR;
+ z->state->blocks = Z_NULL;
+
+ /* handle undocumented nowrap option (no zlib header or check) */
+ z->state->nowrap = 0;
+ if (w < 0)
+ {
+ w = - w;
+ z->state->nowrap = 1;
+ }
+
+ /* set window size */
+ if (w < 8 || w > 15)
+ {
+ inflateEnd(z);
+ return Z_STREAM_ERROR;
+ }
+ z->state->wbits = (uInt)w;
+
+ /* create inflate_blocks state */
+ if ((z->state->blocks =
+ inflate_blocks_new(z, z->state->nowrap ? Z_NULL : adler32, 1 << w))
+ == Z_NULL)
+ {
+ inflateEnd(z);
+ return Z_MEM_ERROR;
+ }
+ Trace("inflate: allocated\n");
+
+ /* reset state */
+ inflateReset(z);
+ return Z_OK;
+}
+
+int inflateInit(z)
+z_stream *z;
+{
+ return inflateInit2(z, DEF_WBITS);
+}
+
+#define NEEDBYTE {if(z->avail_in==0)goto empty;r=Z_OK;}
+#define NEXTBYTE (z->avail_in--,z->total_in++,*z->next_in++)
+
+int inflate(z, f)
+z_stream *z;
+int f;
+{
+ int r;
+ uInt b;
+
+ if (z == Z_NULL || z->next_in == Z_NULL)
+ return Z_STREAM_ERROR;
+ r = Z_BUF_ERROR;
+ while (1) switch (z->state->mode)
+ {
+ case METHOD:
+ NEEDBYTE
+ if (((z->state->sub.method = NEXTBYTE) & 0xf) != DEFLATED)
+ {
+ z->state->mode = BAD;
+ z->msg = "unknown compression method";
+ z->state->sub.marker = 5; /* can't try inflateSync */
+ break;
+ }
+ if ((z->state->sub.method >> 4) + 8 > z->state->wbits)
+ {
+ z->state->mode = BAD;
+ z->msg = "invalid window size";
+ z->state->sub.marker = 5; /* can't try inflateSync */
+ break;
+ }
+ z->state->mode = FLAG;
+ case FLAG:
+ NEEDBYTE
+ if ((b = NEXTBYTE) & 0x20)
+ {
+ z->state->mode = BAD;
+ z->msg = "invalid reserved bit";
+ z->state->sub.marker = 5; /* can't try inflateSync */
+ break;
+ }
+ if (((z->state->sub.method << 8) + b) % 31)
+ {
+ z->state->mode = BAD;
+ z->msg = "incorrect header check";
+ z->state->sub.marker = 5; /* can't try inflateSync */
+ break;
+ }
+ Trace("inflate: zlib header ok\n");
+ z->state->mode = BLOCKS;
+ case BLOCKS:
+ r = inflate_blocks(z->state->blocks, z, r);
+ if (f == Z_PACKET_FLUSH && z->avail_in == 0 && z->avail_out != 0)
+ r = inflate_packet_flush(z->state->blocks);
+ if (r == Z_DATA_ERROR)
+ {
+ z->state->mode = BAD;
+ z->state->sub.marker = 0; /* can try inflateSync */
+ break;
+ }
+ if (r != Z_STREAM_END)
+ return r;
+ r = Z_OK;
+ inflate_blocks_reset(z->state->blocks, z, &z->state->sub.check.was);
+ if (z->state->nowrap)
+ {
+ z->state->mode = DONE;
+ break;
+ }
+ z->state->mode = CHECK4;
+ case CHECK4:
+ NEEDBYTE
+ z->state->sub.check.need = (uLong)NEXTBYTE << 24;
+ z->state->mode = CHECK3;
+ case CHECK3:
+ NEEDBYTE
+ z->state->sub.check.need += (uLong)NEXTBYTE << 16;
+ z->state->mode = CHECK2;
+ case CHECK2:
+ NEEDBYTE
+ z->state->sub.check.need += (uLong)NEXTBYTE << 8;
+ z->state->mode = CHECK1;
+ case CHECK1:
+ NEEDBYTE
+ z->state->sub.check.need += (uLong)NEXTBYTE;
+
+ if (z->state->sub.check.was != z->state->sub.check.need)
+ {
+ z->state->mode = BAD;
+ z->msg = "incorrect data check";
+ z->state->sub.marker = 5; /* can't try inflateSync */
+ break;
+ }
+ Trace( "inflate: zlib check ok\n");
+ z->state->mode = DONE;
+ case DONE:
+ return Z_STREAM_END;
+ case BAD:
+ return Z_DATA_ERROR;
+ default:
+ return Z_STREAM_ERROR;
+ }
+
+ empty:
+ if (f != Z_PACKET_FLUSH)
+ return r;
+ z->state->mode = BAD;
+ z->state->sub.marker = 0; /* can try inflateSync */
+ return Z_DATA_ERROR;
+}
+
+/*
+ * This subroutine adds the data at next_in/avail_in to the output history
+ * without performing any output. The output buffer must be "caught up";
+ * i.e. no pending output (hence s->read equals s->write), and the state must
+ * be BLOCKS (i.e. we should be willing to see the start of a series of
+ * BLOCKS). On exit, the output will also be caught up, and the checksum
+ * will have been updated if need be.
+ */
+
+int inflateIncomp(z)
+z_stream *z;
+{
+ if (z->state->mode != BLOCKS)
+ return Z_DATA_ERROR;
+ return inflate_addhistory(z->state->blocks, z);
+}
+
+int inflateSync(z)
+z_stream *z;
+{
+ uInt n; /* number of bytes to look at */
+ Bytef *p; /* pointer to bytes */
+ uInt m; /* number of marker bytes found in a row */
+ uLong r, w; /* temporaries to save total_in and total_out */
+
+ /* set up */
+ if (z == Z_NULL || z->state == Z_NULL)
+ return Z_STREAM_ERROR;
+ if (z->state->mode != BAD)
+ {
+ z->state->mode = BAD;
+ z->state->sub.marker = 0;
+ }
+ if ((n = z->avail_in) == 0)
+ return Z_BUF_ERROR;
+ p = z->next_in;
+ m = z->state->sub.marker;
+
+ /* search */
+ while (n && m < 4)
+ {
+ if (*p == (Byte)(m < 2 ? 0 : 0xff))
+ m++;
+ else if (*p)
+ m = 0;
+ else
+ m = 4 - m;
+ p++, n--;
+ }
+
+ /* restore */
+ z->total_in += p - z->next_in;
+ z->next_in = p;
+ z->avail_in = n;
+ z->state->sub.marker = m;
+
+ /* return no joy or set up to restart on a new block */
+ if (m != 4)
+ return Z_DATA_ERROR;
+ r = z->total_in; w = z->total_out;
+ inflateReset(z);
+ z->total_in = r; z->total_out = w;
+ z->state->mode = BLOCKS;
+ return Z_OK;
+}
+
+#undef NEEDBYTE
+#undef NEXTBYTE
+
+/*+++++*/
+/* infutil.h -- types and macros common to blocks and codes
+ * Copyright (C) 1995 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* WARNING: this file should *not* be used by applications. It is
+ part of the implementation of the compression library and is
+ subject to change. Applications should only use zlib.h.
+ */
+
+/* inflate blocks semi-private state */
+struct inflate_blocks_state {
+
+ /* mode */
+ enum {
+ TYPE, /* get type bits (3, including end bit) */
+ LENS, /* get lengths for stored */
+ STORED, /* processing stored block */
+ TABLE, /* get table lengths */
+ BTREE, /* get bit lengths tree for a dynamic block */
+ DTREE, /* get length, distance trees for a dynamic block */
+ CODES, /* processing fixed or dynamic block */
+ DRY, /* output remaining window bytes */
+ DONEB, /* finished last block, done */
+ BADB} /* got a data error--stuck here */
+ mode; /* current inflate_block mode */
+
+ /* mode dependent information */
+ union {
+ uInt left; /* if STORED, bytes left to copy */
+ struct {
+ uInt table; /* table lengths (14 bits) */
+ uInt index; /* index into blens (or border) */
+ uIntf *blens; /* bit lengths of codes */
+ uInt bb; /* bit length tree depth */
+ inflate_huft *tb; /* bit length decoding tree */
+ int nblens; /* # elements allocated at blens */
+ } trees; /* if DTREE, decoding info for trees */
+ struct {
+ inflate_huft *tl, *td; /* trees to free */
+ inflate_codes_statef
+ *codes;
+ } decode; /* if CODES, current state */
+ } sub; /* submode */
+ uInt last; /* true if this block is the last block */
+
+ /* mode independent information */
+ uInt bitk; /* bits in bit buffer */
+ uLong bitb; /* bit buffer */
+ Bytef *window; /* sliding window */
+ Bytef *end; /* one byte after sliding window */
+ Bytef *read; /* window read pointer */
+ Bytef *write; /* window write pointer */
+ check_func checkfn; /* check function */
+ uLong check; /* check on output */
+
+};
+
+/* defines for inflate input/output */
+/* update pointers and return */
+#define UPDBITS {s->bitb=b;s->bitk=k;}
+#define UPDIN {z->avail_in=n;z->total_in+=p-z->next_in;z->next_in=p;}
+#define UPDOUT {s->write=q;}
+#define UPDATE {UPDBITS UPDIN UPDOUT}
+#define LEAVE {UPDATE return inflate_flush(s,z,r);}
+/* get bytes and bits */
+#define LOADIN {p=z->next_in;n=z->avail_in;b=s->bitb;k=s->bitk;}
+#define NEEDBYTE {if(n)r=Z_OK;else LEAVE}
+#define NEXTBYTE (n--,*p++)
+#define NEEDBITS(j) {while(k<(j)){NEEDBYTE;b|=((uLong)NEXTBYTE)<<k;k+=8;}}
+#define DUMPBITS(j) {b>>=(j);k-=(j);}
+/* output bytes */
+#define WAVAIL (q<s->read?s->read-q-1:s->end-q)
+#define LOADOUT {q=s->write;m=WAVAIL;}
+#define WRAP {if(q==s->end&&s->read!=s->window){q=s->window;m=WAVAIL;}}
+#define FLUSH {UPDOUT r=inflate_flush(s,z,r); LOADOUT}
+#define NEEDOUT {if(m==0){WRAP if(m==0){FLUSH WRAP if(m==0) LEAVE}}r=Z_OK;}
+#define OUTBYTE(a) {*q++=(Byte)(a);m--;}
+/* load local pointers */
+#define LOAD {LOADIN LOADOUT}
+
+/* And'ing with mask[n] masks the lower n bits */
+local uInt inflate_mask[] = {
+ 0x0000,
+ 0x0001, 0x0003, 0x0007, 0x000f, 0x001f, 0x003f, 0x007f, 0x00ff,
+ 0x01ff, 0x03ff, 0x07ff, 0x0fff, 0x1fff, 0x3fff, 0x7fff, 0xffff
+};
+
+/* copy as much as possible from the sliding window to the output area */
+local int inflate_flush OF((
+ inflate_blocks_statef *,
+ z_stream *,
+ int));
+
+/*+++++*/
+/* inffast.h -- header to use inffast.c
+ * Copyright (C) 1995 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* WARNING: this file should *not* be used by applications. It is
+ part of the implementation of the compression library and is
+ subject to change. Applications should only use zlib.h.
+ */
+
+local int inflate_fast OF((
+ uInt,
+ uInt,
+ inflate_huft *,
+ inflate_huft *,
+ inflate_blocks_statef *,
+ z_stream *));
+
+/*+++++*/
+/* infblock.c -- interpret and process block types to last block
+ * Copyright (C) 1995 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* Table for deflate from PKZIP's appnote.txt. */
+local uInt border[] = { /* Order of the bit length code lengths */
+ 16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15};
+
+/*
+ Notes beyond the 1.93a appnote.txt:
+
+ 1. Distance pointers never point before the beginning of the output
+ stream.
+ 2. Distance pointers can point back across blocks, up to 32k away.
+ 3. There is an implied maximum of 7 bits for the bit length table and
+ 15 bits for the actual data.
+ 4. If only one code exists, then it is encoded using one bit. (Zero
+ would be more efficient, but perhaps a little confusing.) If two
+ codes exist, they are coded using one bit each (0 and 1).
+ 5. There is no way of sending zero distance codes--a dummy must be
+ sent if there are none. (History: a pre 2.0 version of PKZIP would
+ store blocks with no distance codes, but this was discovered to be
+ too harsh a criterion.) Valid only for 1.93a. 2.04c does allow
+ zero distance codes, which is sent as one code of zero bits in
+ length.
+ 6. There are up to 286 literal/length codes. Code 256 represents the
+ end-of-block. Note however that the static length tree defines
+ 288 codes just to fill out the Huffman codes. Codes 286 and 287
+ cannot be used though, since there is no length base or extra bits
+ defined for them. Similarily, there are up to 30 distance codes.
+ However, static trees define 32 codes (all 5 bits) to fill out the
+ Huffman codes, but the last two had better not show up in the data.
+ 7. Unzip can check dynamic Huffman blocks for complete code sets.
+ The exception is that a single code would not be complete (see #4).
+ 8. The five bits following the block type is really the number of
+ literal codes sent minus 257.
+ 9. Length codes 8,16,16 are interpreted as 13 length codes of 8 bits
+ (1+6+6). Therefore, to output three times the length, you output
+ three codes (1+1+1), whereas to output four times the same length,
+ you only need two codes (1+3). Hmm.
+ 10. In the tree reconstruction algorithm, Code = Code + Increment
+ only if BitLength(i) is not zero. (Pretty obvious.)
+ 11. Correction: 4 Bits: # of Bit Length codes - 4 (4 - 19)
+ 12. Note: length code 284 can represent 227-258, but length code 285
+ really is 258. The last length deserves its own, short code
+ since it gets used a lot in very redundant files. The length
+ 258 is special since 258 - 3 (the min match length) is 255.
+ 13. The literal/length and distance code bit lengths are read as a
+ single stream of lengths. It is possible (and advantageous) for
+ a repeat code (16, 17, or 18) to go across the boundary between
+ the two sets of lengths.
+ */
+
+local void inflate_blocks_reset(s, z, c)
+inflate_blocks_statef *s;
+z_stream *z;
+uLongf *c;
+{
+ if (s->checkfn != Z_NULL)
+ *c = s->check;
+ if (s->mode == BTREE || s->mode == DTREE)
+ ZFREE(z, s->sub.trees.blens, s->sub.trees.nblens * sizeof(uInt));
+ if (s->mode == CODES)
+ {
+ inflate_codes_free(s->sub.decode.codes, z);
+ inflate_trees_free(s->sub.decode.td, z);
+ inflate_trees_free(s->sub.decode.tl, z);
+ }
+ s->mode = TYPE;
+ s->bitk = 0;
+ s->bitb = 0;
+ s->read = s->write = s->window;
+ if (s->checkfn != Z_NULL)
+ s->check = (*s->checkfn)(0L, Z_NULL, 0);
+ Trace("inflate: blocks reset\n");
+}
+
+local inflate_blocks_statef *inflate_blocks_new(z, c, w)
+z_stream *z;
+check_func c;
+uInt w;
+{
+ inflate_blocks_statef *s;
+
+ if ((s = (inflate_blocks_statef *)ZALLOC
+ (z,1,sizeof(struct inflate_blocks_state))) == Z_NULL)
+ return s;
+ if ((s->window = (Bytef *)ZALLOC(z, 1, w)) == Z_NULL)
+ {
+ ZFREE(z, s, sizeof(struct inflate_blocks_state));
+ return Z_NULL;
+ }
+ s->end = s->window + w;
+ s->checkfn = c;
+ s->mode = TYPE;
+ Trace("inflate: blocks allocated\n");
+ inflate_blocks_reset(s, z, &s->check);
+ return s;
+}
+
+local int inflate_blocks(s, z, r)
+inflate_blocks_statef *s;
+z_stream *z;
+int r;
+{
+ uInt t; /* temporary storage */
+ uLong b; /* bit buffer */
+ uInt k; /* bits in bit buffer */
+ Bytef *p; /* input data pointer */
+ uInt n; /* bytes available there */
+ Bytef *q; /* output window write pointer */
+ uInt m; /* bytes to end of window or read pointer */
+
+ /* copy input/output information to locals (UPDATE macro restores) */
+ LOAD
+
+ /* process input based on current state */
+ while (1) switch (s->mode)
+ {
+ case TYPE:
+ NEEDBITS(3)
+ t = (uInt)b & 7;
+ s->last = t & 1;
+ switch (t >> 1)
+ {
+ case 0: /* stored */
+ Trace(("inflate: stored block%s\n",
+ s->last ? " (last)" : ""));
+ DUMPBITS(3)
+ t = k & 7; /* go to byte boundary */
+ DUMPBITS(t)
+ s->mode = LENS; /* get length of stored block */
+ break;
+ case 1: /* fixed */
+ Trace(( "inflate: fixed codes block%s\n",
+ s->last ? " (last)" : ""));
+ {
+ uInt bl, bd;
+ inflate_huft *tl, *td;
+
+ inflate_trees_fixed(&bl, &bd, &tl, &td);
+ s->sub.decode.codes = inflate_codes_new(bl, bd, tl, td, z);
+ if (s->sub.decode.codes == Z_NULL)
+ {
+ r = Z_MEM_ERROR;
+ LEAVE
+ }
+ s->sub.decode.tl = Z_NULL; /* don't try to free these */
+ s->sub.decode.td = Z_NULL;
+ }
+ DUMPBITS(3)
+ s->mode = CODES;
+ break;
+ case 2: /* dynamic */
+ Trace(( "inflate: dynamic codes block%s\n",
+ s->last ? " (last)" : ""));
+ DUMPBITS(3)
+ s->mode = TABLE;
+ break;
+ case 3: /* illegal */
+ DUMPBITS(3)
+ s->mode = BADB;
+ z->msg = "invalid block type";
+ r = Z_DATA_ERROR;
+ LEAVE
+ }
+ break;
+ case LENS:
+ NEEDBITS(32)
+ if (((~b) >> 16) != (b & 0xffff))
+ {
+ s->mode = BADB;
+ z->msg = "invalid stored block lengths";
+ r = Z_DATA_ERROR;
+ LEAVE
+ }
+ s->sub.left = (uInt)b & 0xffff;
+ b = k = 0; /* dump bits */
+ Tracev(( "inflate: stored length %u\n", s->sub.left));
+ s->mode = s->sub.left ? STORED : TYPE;
+ break;
+ case STORED:
+ if (n == 0)
+ LEAVE
+ NEEDOUT
+ t = s->sub.left;
+ if (t > n) t = n;
+ if (t > m) t = m;
+ zmemcpy(q, p, t);
+ p += t; n -= t;
+ q += t; m -= t;
+ if ((s->sub.left -= t) != 0)
+ break;
+ Tracev(( "inflate: stored end, %lu total out\n",
+ z->total_out + (q >= s->read ? q - s->read :
+ (s->end - s->read) + (q - s->window))));
+ s->mode = s->last ? DRY : TYPE;
+ break;
+ case TABLE:
+ NEEDBITS(14)
+ s->sub.trees.table = t = (uInt)b & 0x3fff;
+#ifndef PKZIP_BUG_WORKAROUND
+ if ((t & 0x1f) > 29 || ((t >> 5) & 0x1f) > 29)
+ {
+ s->mode = BADB;
+ z->msg = "too many length or distance symbols";
+ r = Z_DATA_ERROR;
+ LEAVE
+ }
+#endif
+ t = 258 + (t & 0x1f) + ((t >> 5) & 0x1f);
+ if (t < 19)
+ t = 19;
+ if ((s->sub.trees.blens = (uIntf*)ZALLOC(z, t, sizeof(uInt))) == Z_NULL)
+ {
+ r = Z_MEM_ERROR;
+ LEAVE
+ }
+ s->sub.trees.nblens = t;
+ DUMPBITS(14)
+ s->sub.trees.index = 0;
+ Tracev(( "inflate: table sizes ok\n"));
+ s->mode = BTREE;
+ case BTREE:
+ while (s->sub.trees.index < 4 + (s->sub.trees.table >> 10))
+ {
+ NEEDBITS(3)
+ s->sub.trees.blens[border[s->sub.trees.index++]] = (uInt)b & 7;
+ DUMPBITS(3)
+ }
+ while (s->sub.trees.index < 19)
+ s->sub.trees.blens[border[s->sub.trees.index++]] = 0;
+ s->sub.trees.bb = 7;
+ t = inflate_trees_bits(s->sub.trees.blens, &s->sub.trees.bb,
+ &s->sub.trees.tb, z);
+ if (t != Z_OK)
+ {
+ r = t;
+ if (r == Z_DATA_ERROR)
+ s->mode = BADB;
+ LEAVE
+ }
+ s->sub.trees.index = 0;
+ Tracev(( "inflate: bits tree ok\n"));
+ s->mode = DTREE;
+ case DTREE:
+ while (t = s->sub.trees.table,
+ s->sub.trees.index < 258 + (t & 0x1f) + ((t >> 5) & 0x1f))
+ {
+ inflate_huft *h;
+ uInt i, j, c;
+
+ t = s->sub.trees.bb;
+ NEEDBITS(t)
+ h = s->sub.trees.tb + ((uInt)b & inflate_mask[t]);
+ t = h->word.what.Bits;
+ c = h->more.Base;
+ if (c < 16)
+ {
+ DUMPBITS(t)
+ s->sub.trees.blens[s->sub.trees.index++] = c;
+ }
+ else /* c == 16..18 */
+ {
+ i = c == 18 ? 7 : c - 14;
+ j = c == 18 ? 11 : 3;
+ NEEDBITS(t + i)
+ DUMPBITS(t)
+ j += (uInt)b & inflate_mask[i];
+ DUMPBITS(i)
+ i = s->sub.trees.index;
+ t = s->sub.trees.table;
+ if (i + j > 258 + (t & 0x1f) + ((t >> 5) & 0x1f) ||
+ (c == 16 && i < 1))
+ {
+ s->mode = BADB;
+ z->msg = "invalid bit length repeat";
+ r = Z_DATA_ERROR;
+ LEAVE
+ }
+ c = c == 16 ? s->sub.trees.blens[i - 1] : 0;
+ do {
+ s->sub.trees.blens[i++] = c;
+ } while (--j);
+ s->sub.trees.index = i;
+ }
+ }
+ inflate_trees_free(s->sub.trees.tb, z);
+ s->sub.trees.tb = Z_NULL;
+ {
+ uInt bl, bd;
+ inflate_huft *tl, *td;
+ inflate_codes_statef *c;
+
+ bl = 9; /* must be <= 9 for lookahead assumptions */
+ bd = 6; /* must be <= 9 for lookahead assumptions */
+ t = s->sub.trees.table;
+ t = inflate_trees_dynamic(257 + (t & 0x1f), 1 + ((t >> 5) & 0x1f),
+ s->sub.trees.blens, &bl, &bd, &tl, &td, z);
+ if (t != Z_OK)
+ {
+ if (t == (uInt)Z_DATA_ERROR)
+ s->mode = BADB;
+ r = t;
+ LEAVE
+ }
+ Tracev(( "inflate: trees ok\n"));
+ if ((c = inflate_codes_new(bl, bd, tl, td, z)) == Z_NULL)
+ {
+ inflate_trees_free(td, z);
+ inflate_trees_free(tl, z);
+ r = Z_MEM_ERROR;
+ LEAVE
+ }
+ ZFREE(z, s->sub.trees.blens, s->sub.trees.nblens * sizeof(uInt));
+ s->sub.decode.codes = c;
+ s->sub.decode.tl = tl;
+ s->sub.decode.td = td;
+ }
+ s->mode = CODES;
+ case CODES:
+ UPDATE
+ if ((r = inflate_codes(s, z, r)) != Z_STREAM_END)
+ return inflate_flush(s, z, r);
+ r = Z_OK;
+ inflate_codes_free(s->sub.decode.codes, z);
+ inflate_trees_free(s->sub.decode.td, z);
+ inflate_trees_free(s->sub.decode.tl, z);
+ LOAD
+ Tracev(( "inflate: codes end, %lu total out\n",
+ z->total_out + (q >= s->read ? q - s->read :
+ (s->end - s->read) + (q - s->window))));
+ if (!s->last)
+ {
+ s->mode = TYPE;
+ break;
+ }
+ if (k > 7) /* return unused byte, if any */
+ {
+ Assert(k < 16, "inflate_codes grabbed too many bytes")
+ k -= 8;
+ n++;
+ p--; /* can always return one */
+ }
+ s->mode = DRY;
+ case DRY:
+ FLUSH
+ if (s->read != s->write)
+ LEAVE
+ s->mode = DONEB;
+ case DONEB:
+ r = Z_STREAM_END;
+ LEAVE
+ case BADB:
+ r = Z_DATA_ERROR;
+ LEAVE
+ default:
+ r = Z_STREAM_ERROR;
+ LEAVE
+ }
+}
+
+local int inflate_blocks_free(s, z, c)
+inflate_blocks_statef *s;
+z_stream *z;
+uLongf *c;
+{
+ inflate_blocks_reset(s, z, c);
+ ZFREE(z, s->window, s->end - s->window);
+ ZFREE(z, s, sizeof(struct inflate_blocks_state));
+ Trace(( "inflate: blocks freed\n"));
+ return Z_OK;
+}
+
+/*
+ * This subroutine adds the data at next_in/avail_in to the output history
+ * without performing any output. The output buffer must be "caught up";
+ * i.e. no pending output (hence s->read equals s->write), and the state must
+ * be BLOCKS (i.e. we should be willing to see the start of a series of
+ * BLOCKS). On exit, the output will also be caught up, and the checksum
+ * will have been updated if need be.
+ */
+local int inflate_addhistory(s, z)
+inflate_blocks_statef *s;
+z_stream *z;
+{
+ uLong b; /* bit buffer */ /* NOT USED HERE */
+ uInt k; /* bits in bit buffer */ /* NOT USED HERE */
+ uInt t; /* temporary storage */
+ Bytef *p; /* input data pointer */
+ uInt n; /* bytes available there */
+ Bytef *q; /* output window write pointer */
+ uInt m; /* bytes to end of window or read pointer */
+
+ if (s->read != s->write)
+ return Z_STREAM_ERROR;
+ if (s->mode != TYPE)
+ return Z_DATA_ERROR;
+
+ /* we're ready to rock */
+ LOAD
+ /* while there is input ready, copy to output buffer, moving
+ * pointers as needed.
+ */
+ while (n) {
+ t = n; /* how many to do */
+ /* is there room until end of buffer? */
+ if (t > m) t = m;
+ /* update check information */
+ if (s->checkfn != Z_NULL)
+ s->check = (*s->checkfn)(s->check, q, t);
+ zmemcpy(q, p, t);
+ q += t;
+ p += t;
+ n -= t;
+ z->total_out += t;
+ s->read = q; /* drag read pointer forward */
+/* WRAP */ /* expand WRAP macro by hand to handle s->read */
+ if (q == s->end) {
+ s->read = q = s->window;
+ m = WAVAIL;
+ }
+ }
+ UPDATE
+ return Z_OK;
+}
+
+/*
+ * At the end of a Deflate-compressed PPP packet, we expect to have seen
+ * a `stored' block type value but not the (zero) length bytes.
+ */
+local int inflate_packet_flush(s)
+ inflate_blocks_statef *s;
+{
+ if (s->mode != LENS)
+ return Z_DATA_ERROR;
+ s->mode = TYPE;
+ return Z_OK;
+}
+
+/*+++++*/
+/* inftrees.c -- generate Huffman trees for efficient decoding
+ * Copyright (C) 1995 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* simplify the use of the inflate_huft type with some defines */
+#define base more.Base
+#define next more.Next
+#define exop word.what.Exop
+#define bits word.what.Bits
+
+local int huft_build OF((
+ uIntf *, /* code lengths in bits */
+ uInt, /* number of codes */
+ uInt, /* number of "simple" codes */
+ uIntf *, /* list of base values for non-simple codes */
+ uIntf *, /* list of extra bits for non-simple codes */
+ inflate_huft * FAR*,/* result: starting table */
+ uIntf *, /* maximum lookup bits (returns actual) */
+ z_stream *)); /* for zalloc function */
+
+local voidpf falloc OF((
+ voidpf, /* opaque pointer (not used) */
+ uInt, /* number of items */
+ uInt)); /* size of item */
+
+local void ffree OF((
+ voidpf q, /* opaque pointer (not used) */
+ voidpf p, /* what to free (not used) */
+ uInt n)); /* number of bytes (not used) */
+
+/* Tables for deflate from PKZIP's appnote.txt. */
+local uInt cplens[] = { /* Copy lengths for literal codes 257..285 */
+ 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 15, 17, 19, 23, 27, 31,
+ 35, 43, 51, 59, 67, 83, 99, 115, 131, 163, 195, 227, 258, 0, 0};
+ /* actually lengths - 2; also see note #13 above about 258 */
+local uInt cplext[] = { /* Extra bits for literal codes 257..285 */
+ 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2,
+ 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 5, 5, 0, 192, 192}; /* 192==invalid */
+local uInt cpdist[] = { /* Copy offsets for distance codes 0..29 */
+ 1, 2, 3, 4, 5, 7, 9, 13, 17, 25, 33, 49, 65, 97, 129, 193,
+ 257, 385, 513, 769, 1025, 1537, 2049, 3073, 4097, 6145,
+ 8193, 12289, 16385, 24577};
+local uInt cpdext[] = { /* Extra bits for distance codes */
+ 0, 0, 0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6,
+ 7, 7, 8, 8, 9, 9, 10, 10, 11, 11,
+ 12, 12, 13, 13};
+
+/*
+ Huffman code decoding is performed using a multi-level table lookup.
+ The fastest way to decode is to simply build a lookup table whose
+ size is determined by the longest code. However, the time it takes
+ to build this table can also be a factor if the data being decoded
+ is not very long. The most common codes are necessarily the
+ shortest codes, so those codes dominate the decoding time, and hence
+ the speed. The idea is you can have a shorter table that decodes the
+ shorter, more probable codes, and then point to subsidiary tables for
+ the longer codes. The time it costs to decode the longer codes is
+ then traded against the time it takes to make longer tables.
+
+ This results of this trade are in the variables lbits and dbits
+ below. lbits is the number of bits the first level table for literal/
+ length codes can decode in one step, and dbits is the same thing for
+ the distance codes. Subsequent tables are also less than or equal to
+ those sizes. These values may be adjusted either when all of the
+ codes are shorter than that, in which case the longest code length in
+ bits is used, or when the shortest code is *longer* than the requested
+ table size, in which case the length of the shortest code in bits is
+ used.
+
+ There are two different values for the two tables, since they code a
+ different number of possibilities each. The literal/length table
+ codes 286 possible values, or in a flat code, a little over eight
+ bits. The distance table codes 30 possible values, or a little less
+ than five bits, flat. The optimum values for speed end up being
+ about one bit more than those, so lbits is 8+1 and dbits is 5+1.
+ The optimum values may differ though from machine to machine, and
+ possibly even between compilers. Your mileage may vary.
+ */
+
+/* If BMAX needs to be larger than 16, then h and x[] should be uLong. */
+#define BMAX 15 /* maximum bit length of any code */
+#define N_MAX 288 /* maximum number of codes in any set */
+
+#ifdef DEBUG_ZLIB
+ uInt inflate_hufts;
+#endif
+
+local int huft_build(b, n, s, d, e, t, m, zs)
+uIntf *b; /* code lengths in bits (all assumed <= BMAX) */
+uInt n; /* number of codes (assumed <= N_MAX) */
+uInt s; /* number of simple-valued codes (0..s-1) */
+uIntf *d; /* list of base values for non-simple codes */
+uIntf *e; /* list of extra bits for non-simple codes */
+inflate_huft * FAR *t; /* result: starting table */
+uIntf *m; /* maximum lookup bits, returns actual */
+z_stream *zs; /* for zalloc function */
+/* Given a list of code lengths and a maximum table size, make a set of
+ tables to decode that set of codes. Return Z_OK on success, Z_BUF_ERROR
+ if the given code set is incomplete (the tables are still built in this
+ case), Z_DATA_ERROR if the input is invalid (all zero length codes or an
+ over-subscribed set of lengths), or Z_MEM_ERROR if not enough memory. */
+{
+
+ uInt a; /* counter for codes of length k */
+ uInt c[BMAX+1]; /* bit length count table */
+ uInt f; /* i repeats in table every f entries */
+ int g; /* maximum code length */
+ int h; /* table level */
+ register uInt i; /* counter, current code */
+ register uInt j; /* counter */
+ register int k; /* number of bits in current code */
+ int l; /* bits per table (returned in m) */
+ register uIntf *p; /* pointer into c[], b[], or v[] */
+ inflate_huft *q; /* points to current table */
+ struct inflate_huft_s r; /* table entry for structure assignment */
+ inflate_huft *u[BMAX]; /* table stack */
+ uInt v[N_MAX]; /* values in order of bit length */
+ register int w; /* bits before this table == (l * h) */
+ uInt x[BMAX+1]; /* bit offsets, then code stack */
+ uIntf *xp; /* pointer into x */
+ int y; /* number of dummy codes added */
+ uInt z; /* number of entries in current table */
+
+ /* Generate counts for each bit length */
+ p = c;
+#define C0 *p++ = 0;
+#define C2 C0 C0 C0 C0
+#define C4 C2 C2 C2 C2
+ C4 /* clear c[]--assume BMAX+1 is 16 */
+ p = b; i = n;
+ do {
+ c[*p++]++; /* assume all entries <= BMAX */
+ } while (--i);
+ if (c[0] == n) /* null input--all zero length codes */
+ {
+ *t = (inflate_huft *)Z_NULL;
+ *m = 0;
+ return Z_OK;
+ }
+
+ /* Find minimum and maximum length, bound *m by those */
+ l = *m;
+ for (j = 1; j <= BMAX; j++)
+ if (c[j])
+ break;
+ k = j; /* minimum code length */
+ if ((uInt)l < j)
+ l = j;
+ for (i = BMAX; i; i--)
+ if (c[i])
+ break;
+ g = i; /* maximum code length */
+ if ((uInt)l > i)
+ l = i;
+ *m = l;
+
+ /* Adjust last length count to fill out codes, if needed */
+ for (y = 1 << j; j < i; j++, y <<= 1)
+ if ((y -= c[j]) < 0)
+ return Z_DATA_ERROR;
+ if ((y -= c[i]) < 0)
+ return Z_DATA_ERROR;
+ c[i] += y;
+
+ /* Generate starting offsets into the value table for each length */
+ x[1] = j = 0;
+ p = c + 1; xp = x + 2;
+ while (--i) { /* note that i == g from above */
+ *xp++ = (j += *p++);
+ }
+
+ /* Make a table of values in order of bit lengths */
+ p = b; i = 0;
+ do {
+ if ((j = *p++) != 0)
+ v[x[j]++] = i;
+ } while (++i < n);
+
+ /* Generate the Huffman codes and for each, make the table entries */
+ x[0] = i = 0; /* first Huffman code is zero */
+ p = v; /* grab values in bit order */
+ h = -1; /* no tables yet--level -1 */
+ w = -l; /* bits decoded == (l * h) */
+ u[0] = (inflate_huft *)Z_NULL; /* just to keep compilers happy */
+ q = (inflate_huft *)Z_NULL; /* ditto */
+ z = 0; /* ditto */
+
+ /* go through the bit lengths (k already is bits in shortest code) */
+ for (; k <= g; k++)
+ {
+ a = c[k];
+ while (a--)
+ {
+ /* here i is the Huffman code of length k bits for value *p */
+ /* make tables up to required level */
+ while (k > w + l)
+ {
+ h++;
+ w += l; /* previous table always l bits */
+
+ /* compute minimum size table less than or equal to l bits */
+ z = (z = g - w) > (uInt)l ? l : z; /* table size upper limit */
+ if ((f = 1 << (j = k - w)) > a + 1) /* try a k-w bit table */
+ { /* too few codes for k-w bit table */
+ f -= a + 1; /* deduct codes from patterns left */
+ xp = c + k;
+ if (j < z)
+ while (++j < z) /* try smaller tables up to z bits */
+ {
+ if ((f <<= 1) <= *++xp)
+ break; /* enough codes to use up j bits */
+ f -= *xp; /* else deduct codes from patterns */
+ }
+ }
+ z = 1 << j; /* table entries for j-bit table */
+
+ /* allocate and link in new table */
+ if ((q = (inflate_huft *)ZALLOC
+ (zs,z + 1,sizeof(inflate_huft))) == Z_NULL)
+ {
+ if (h)
+ inflate_trees_free(u[0], zs);
+ return Z_MEM_ERROR; /* not enough memory */
+ }
+ q->word.Nalloc = z + 1;
+#ifdef DEBUG_ZLIB
+ inflate_hufts += z + 1;
+#endif
+ *t = q + 1; /* link to list for huft_free() */
+ *(t = &(q->next)) = Z_NULL;
+ u[h] = ++q; /* table starts after link */
+
+ /* connect to last table, if there is one */
+ if (h)
+ {
+ x[h] = i; /* save pattern for backing up */
+ r.bits = (Byte)l; /* bits to dump before this table */
+ r.exop = (Byte)j; /* bits in this table */
+ r.next = q; /* pointer to this table */
+ j = i >> (w - l); /* (get around Turbo C bug) */
+ u[h-1][j] = r; /* connect to last table */
+ }
+ }
+
+ /* set up table entry in r */
+ r.bits = (Byte)(k - w);
+ if (p >= v + n)
+ r.exop = 128 + 64; /* out of values--invalid code */
+ else if (*p < s)
+ {
+ r.exop = (Byte)(*p < 256 ? 0 : 32 + 64); /* 256 is end-of-block */
+ r.base = *p++; /* simple code is just the value */
+ }
+ else
+ {
+ r.exop = (Byte)e[*p - s] + 16 + 64; /* non-simple--look up in lists */
+ r.base = d[*p++ - s];
+ }
+
+ /* fill code-like entries with r */
+ f = 1 << (k - w);
+ for (j = i >> w; j < z; j += f)
+ q[j] = r;
+
+ /* backwards increment the k-bit code i */
+ for (j = 1 << (k - 1); i & j; j >>= 1)
+ i ^= j;
+ i ^= j;
+
+ /* backup over finished tables */
+ while ((i & ((1 << w) - 1)) != x[h])
+ {
+ h--; /* don't need to update q */
+ w -= l;
+ }
+ }
+ }
+
+ /* Return Z_BUF_ERROR if we were given an incomplete table */
+ return y != 0 && g != 1 ? Z_BUF_ERROR : Z_OK;
+}
+
+local int inflate_trees_bits(c, bb, tb, z)
+uIntf *c; /* 19 code lengths */
+uIntf *bb; /* bits tree desired/actual depth */
+inflate_huft * FAR *tb; /* bits tree result */
+z_stream *z; /* for zfree function */
+{
+ int r;
+
+ r = huft_build(c, 19, 19, (uIntf*)Z_NULL, (uIntf*)Z_NULL, tb, bb, z);
+ if (r == Z_DATA_ERROR)
+ z->msg = "oversubscribed dynamic bit lengths tree";
+ else if (r == Z_BUF_ERROR)
+ {
+ inflate_trees_free(*tb, z);
+ z->msg = "incomplete dynamic bit lengths tree";
+ r = Z_DATA_ERROR;
+ }
+ return r;
+}
+
+local int inflate_trees_dynamic(nl, nd, c, bl, bd, tl, td, z)
+uInt nl; /* number of literal/length codes */
+uInt nd; /* number of distance codes */
+uIntf *c; /* that many (total) code lengths */
+uIntf *bl; /* literal desired/actual bit depth */
+uIntf *bd; /* distance desired/actual bit depth */
+inflate_huft * FAR *tl; /* literal/length tree result */
+inflate_huft * FAR *td; /* distance tree result */
+z_stream *z; /* for zfree function */
+{
+ int r;
+
+ /* build literal/length tree */
+ if ((r = huft_build(c, nl, 257, cplens, cplext, tl, bl, z)) != Z_OK)
+ {
+ if (r == Z_DATA_ERROR)
+ z->msg = "oversubscribed literal/length tree";
+ else if (r == Z_BUF_ERROR)
+ {
+ inflate_trees_free(*tl, z);
+ z->msg = "incomplete literal/length tree";
+ r = Z_DATA_ERROR;
+ }
+ return r;
+ }
+
+ /* build distance tree */
+ if ((r = huft_build(c + nl, nd, 0, cpdist, cpdext, td, bd, z)) != Z_OK)
+ {
+ if (r == Z_DATA_ERROR)
+ z->msg = "oversubscribed literal/length tree";
+ else if (r == Z_BUF_ERROR) {
+#ifdef PKZIP_BUG_WORKAROUND
+ r = Z_OK;
+ }
+#else
+ inflate_trees_free(*td, z);
+ z->msg = "incomplete literal/length tree";
+ r = Z_DATA_ERROR;
+ }
+ inflate_trees_free(*tl, z);
+ return r;
+#endif
+ }
+
+ /* done */
+ return Z_OK;
+}
+
+/* build fixed tables only once--keep them here */
+local int fixed_lock = 0;
+local int fixed_built = 0;
+#define FIXEDH 530 /* number of hufts used by fixed tables */
+local uInt fixed_left = FIXEDH;
+local inflate_huft fixed_mem[FIXEDH];
+local uInt fixed_bl;
+local uInt fixed_bd;
+local inflate_huft *fixed_tl;
+local inflate_huft *fixed_td;
+
+local voidpf falloc(q, n, s)
+voidpf q; /* opaque pointer (not used) */
+uInt n; /* number of items */
+uInt s; /* size of item */
+{
+ Assert(s == sizeof(inflate_huft) && n <= fixed_left,
+ "inflate_trees falloc overflow");
+ if (q) s++; /* to make some compilers happy */
+ fixed_left -= n;
+ return (voidpf)(fixed_mem + fixed_left);
+}
+
+local void ffree(q, p, n)
+voidpf q;
+voidpf p;
+uInt n;
+{
+ Assert(0, "inflate_trees ffree called!");
+ if (q) q = p; /* to make some compilers happy */
+}
+
+local int inflate_trees_fixed(bl, bd, tl, td)
+uIntf *bl; /* literal desired/actual bit depth */
+uIntf *bd; /* distance desired/actual bit depth */
+inflate_huft * FAR *tl; /* literal/length tree result */
+inflate_huft * FAR *td; /* distance tree result */
+{
+ /* build fixed tables if not built already--lock out other instances */
+ while (++fixed_lock > 1)
+ fixed_lock--;
+ if (!fixed_built)
+ {
+ int k; /* temporary variable */
+ unsigned c[288]; /* length list for huft_build */
+ z_stream z; /* for falloc function */
+
+ /* set up fake z_stream for memory routines */
+ z.zalloc = falloc;
+ z.zfree = ffree;
+ z.opaque = Z_NULL;
+
+ /* literal table */
+ for (k = 0; k < 144; k++)
+ c[k] = 8;
+ for (; k < 256; k++)
+ c[k] = 9;
+ for (; k < 280; k++)
+ c[k] = 7;
+ for (; k < 288; k++)
+ c[k] = 8;
+ fixed_bl = 7;
+ huft_build(c, 288, 257, cplens, cplext, &fixed_tl, &fixed_bl, &z);
+
+ /* distance table */
+ for (k = 0; k < 30; k++)
+ c[k] = 5;
+ fixed_bd = 5;
+ huft_build(c, 30, 0, cpdist, cpdext, &fixed_td, &fixed_bd, &z);
+
+ /* done */
+ fixed_built = 1;
+ }
+ fixed_lock--;
+ *bl = fixed_bl;
+ *bd = fixed_bd;
+ *tl = fixed_tl;
+ *td = fixed_td;
+ return Z_OK;
+}
+
+local int inflate_trees_free(t, z)
+inflate_huft *t; /* table to free */
+z_stream *z; /* for zfree function */
+/* Free the malloc'ed tables built by huft_build(), which makes a linked
+ list of the tables it made, with the links in a dummy first entry of
+ each table. */
+{
+ register inflate_huft *p, *q;
+
+ /* Go through linked list, freeing from the malloced (t[-1]) address. */
+ p = t;
+ while (p != Z_NULL)
+ {
+ q = (--p)->next;
+ ZFREE(z, p, p->word.Nalloc * sizeof(inflate_huft));
+ p = q;
+ }
+ return Z_OK;
+}
+
+/*+++++*/
+/* infcodes.c -- process literals and length/distance pairs
+ * Copyright (C) 1995 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* simplify the use of the inflate_huft type with some defines */
+#define base more.Base
+#define next more.Next
+#define exop word.what.Exop
+#define bits word.what.Bits
+
+/* inflate codes private state */
+struct inflate_codes_state {
+
+ /* mode */
+ enum { /* waiting for "i:"=input, "o:"=output, "x:"=nothing */
+ START, /* x: set up for LEN */
+ LEN, /* i: get length/literal/eob next */
+ LENEXT, /* i: getting length extra (have base) */
+ DIST, /* i: get distance next */
+ DISTEXT, /* i: getting distance extra */
+ COPY, /* o: copying bytes in window, waiting for space */
+ LIT, /* o: got literal, waiting for output space */
+ WASH, /* o: got eob, possibly still output waiting */
+ END, /* x: got eob and all data flushed */
+ BADCODE} /* x: got error */
+ mode; /* current inflate_codes mode */
+
+ /* mode dependent information */
+ uInt len;
+ union {
+ struct {
+ inflate_huft *tree; /* pointer into tree */
+ uInt need; /* bits needed */
+ } code; /* if LEN or DIST, where in tree */
+ uInt lit; /* if LIT, literal */
+ struct {
+ uInt get; /* bits to get for extra */
+ uInt dist; /* distance back to copy from */
+ } copy; /* if EXT or COPY, where and how much */
+ } sub; /* submode */
+
+ /* mode independent information */
+ Byte lbits; /* ltree bits decoded per branch */
+ Byte dbits; /* dtree bits decoder per branch */
+ inflate_huft *ltree; /* literal/length/eob tree */
+ inflate_huft *dtree; /* distance tree */
+
+};
+
+local inflate_codes_statef *inflate_codes_new(bl, bd, tl, td, z)
+uInt bl, bd;
+inflate_huft *tl, *td;
+z_stream *z;
+{
+ inflate_codes_statef *c;
+
+ if ((c = (inflate_codes_statef *)
+ ZALLOC(z,1,sizeof(struct inflate_codes_state))) != Z_NULL)
+ {
+ c->mode = START;
+ c->lbits = (Byte)bl;
+ c->dbits = (Byte)bd;
+ c->ltree = tl;
+ c->dtree = td;
+ Tracev(( "inflate: codes new\n"));
+ }
+ return c;
+}
+
+local int inflate_codes(s, z, r)
+inflate_blocks_statef *s;
+z_stream *z;
+int r;
+{
+ uInt j; /* temporary storage */
+ inflate_huft *t; /* temporary pointer */
+ uInt e; /* extra bits or operation */
+ uLong b; /* bit buffer */
+ uInt k; /* bits in bit buffer */
+ Bytef *p; /* input data pointer */
+ uInt n; /* bytes available there */
+ Bytef *q; /* output window write pointer */
+ uInt m; /* bytes to end of window or read pointer */
+ Bytef *f; /* pointer to copy strings from */
+ inflate_codes_statef *c = s->sub.decode.codes; /* codes state */
+
+ /* copy input/output information to locals (UPDATE macro restores) */
+ LOAD
+
+ /* process input and output based on current state */
+ while (1) switch (c->mode)
+ { /* waiting for "i:"=input, "o:"=output, "x:"=nothing */
+ case START: /* x: set up for LEN */
+#ifndef SLOW
+ if (m >= 258 && n >= 10)
+ {
+ UPDATE
+ r = inflate_fast(c->lbits, c->dbits, c->ltree, c->dtree, s, z);
+ LOAD
+ if (r != Z_OK)
+ {
+ c->mode = r == Z_STREAM_END ? WASH : BADCODE;
+ break;
+ }
+ }
+#endif /* !SLOW */
+ c->sub.code.need = c->lbits;
+ c->sub.code.tree = c->ltree;
+ c->mode = LEN;
+ case LEN: /* i: get length/literal/eob next */
+ j = c->sub.code.need;
+ NEEDBITS(j)
+ t = c->sub.code.tree + ((uInt)b & inflate_mask[j]);
+ DUMPBITS(t->bits)
+ e = (uInt)(t->exop);
+ if (e == 0) /* literal */
+ {
+ c->sub.lit = t->base;
+ Tracevv(( t->base >= 0x20 && t->base < 0x7f ?
+ "inflate: literal '%c'\n" :
+ "inflate: literal 0x%02x\n", t->base));
+ c->mode = LIT;
+ break;
+ }
+ if (e & 16) /* length */
+ {
+ c->sub.copy.get = e & 15;
+ c->len = t->base;
+ c->mode = LENEXT;
+ break;
+ }
+ if ((e & 64) == 0) /* next table */
+ {
+ c->sub.code.need = e;
+ c->sub.code.tree = t->next;
+ break;
+ }
+ if (e & 32) /* end of block */
+ {
+ Tracevv(( "inflate: end of block\n"));
+ c->mode = WASH;
+ break;
+ }
+ c->mode = BADCODE; /* invalid code */
+ z->msg = "invalid literal/length code";
+ r = Z_DATA_ERROR;
+ LEAVE
+ case LENEXT: /* i: getting length extra (have base) */
+ j = c->sub.copy.get;
+ NEEDBITS(j)
+ c->len += (uInt)b & inflate_mask[j];
+ DUMPBITS(j)
+ c->sub.code.need = c->dbits;
+ c->sub.code.tree = c->dtree;
+ Tracevv(( "inflate: length %u\n", c->len));
+ c->mode = DIST;
+ case DIST: /* i: get distance next */
+ j = c->sub.code.need;
+ NEEDBITS(j)
+ t = c->sub.code.tree + ((uInt)b & inflate_mask[j]);
+ DUMPBITS(t->bits)
+ e = (uInt)(t->exop);
+ if (e & 16) /* distance */
+ {
+ c->sub.copy.get = e & 15;
+ c->sub.copy.dist = t->base;
+ c->mode = DISTEXT;
+ break;
+ }
+ if ((e & 64) == 0) /* next table */
+ {
+ c->sub.code.need = e;
+ c->sub.code.tree = t->next;
+ break;
+ }
+ c->mode = BADCODE; /* invalid code */
+ z->msg = "invalid distance code";
+ r = Z_DATA_ERROR;
+ LEAVE
+ case DISTEXT: /* i: getting distance extra */
+ j = c->sub.copy.get;
+ NEEDBITS(j)
+ c->sub.copy.dist += (uInt)b & inflate_mask[j];
+ DUMPBITS(j)
+ Tracevv(( "inflate: distance %u\n", c->sub.copy.dist));
+ c->mode = COPY;
+ case COPY: /* o: copying bytes in window, waiting for space */
+#ifndef __TURBOC__ /* Turbo C bug for following expression */
+ f = (uInt)(q - s->window) < c->sub.copy.dist ?
+ s->end - (c->sub.copy.dist - (q - s->window)) :
+ q - c->sub.copy.dist;
+#else
+ f = q - c->sub.copy.dist;
+ if ((uInt)(q - s->window) < c->sub.copy.dist)
+ f = s->end - (c->sub.copy.dist - (q - s->window));
+#endif
+ while (c->len)
+ {
+ NEEDOUT
+ OUTBYTE(*f++)
+ if (f == s->end)
+ f = s->window;
+ c->len--;
+ }
+ c->mode = START;
+ break;
+ case LIT: /* o: got literal, waiting for output space */
+ NEEDOUT
+ OUTBYTE(c->sub.lit)
+ c->mode = START;
+ break;
+ case WASH: /* o: got eob, possibly more output */
+ FLUSH
+ if (s->read != s->write)
+ LEAVE
+ c->mode = END;
+ case END:
+ r = Z_STREAM_END;
+ LEAVE
+ case BADCODE: /* x: got error */
+ r = Z_DATA_ERROR;
+ LEAVE
+ default:
+ r = Z_STREAM_ERROR;
+ LEAVE
+ }
+}
+
+local void inflate_codes_free(c, z)
+inflate_codes_statef *c;
+z_stream *z;
+{
+ ZFREE(z, c, sizeof(struct inflate_codes_state));
+ Tracev(( "inflate: codes free\n"));
+}
+
+/*+++++*/
+/* inflate_util.c -- data and routines common to blocks and codes
+ * Copyright (C) 1995 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* copy as much as possible from the sliding window to the output area */
+local int inflate_flush(s, z, r)
+inflate_blocks_statef *s;
+z_stream *z;
+int r;
+{
+ uInt n;
+ Bytef *p, *q;
+
+ /* local copies of source and destination pointers */
+ p = z->next_out;
+ q = s->read;
+
+ /* compute number of bytes to copy as far as end of window */
+ n = (uInt)((q <= s->write ? s->write : s->end) - q);
+ if (n > z->avail_out) n = z->avail_out;
+ if (n && r == Z_BUF_ERROR) r = Z_OK;
+
+ /* update counters */
+ z->avail_out -= n;
+ z->total_out += n;
+
+ /* update check information */
+ if (s->checkfn != Z_NULL)
+ s->check = (*s->checkfn)(s->check, q, n);
+
+ /* copy as far as end of window */
+ zmemcpy(p, q, n);
+ p += n;
+ q += n;
+
+ /* see if more to copy at beginning of window */
+ if (q == s->end)
+ {
+ /* wrap pointers */
+ q = s->window;
+ if (s->write == s->end)
+ s->write = s->window;
+
+ /* compute bytes to copy */
+ n = (uInt)(s->write - q);
+ if (n > z->avail_out) n = z->avail_out;
+ if (n && r == Z_BUF_ERROR) r = Z_OK;
+
+ /* update counters */
+ z->avail_out -= n;
+ z->total_out += n;
+
+ /* update check information */
+ if (s->checkfn != Z_NULL)
+ s->check = (*s->checkfn)(s->check, q, n);
+
+ /* copy */
+ zmemcpy(p, q, n);
+ p += n;
+ q += n;
+ }
+
+ /* update pointers */
+ z->next_out = p;
+ s->read = q;
+
+ /* done */
+ return r;
+}
+
+/*+++++*/
+/* inffast.c -- process literals and length/distance pairs fast
+ * Copyright (C) 1995 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* simplify the use of the inflate_huft type with some defines */
+#define base more.Base
+#define next more.Next
+#define exop word.what.Exop
+#define bits word.what.Bits
+
+/* macros for bit input with no checking and for returning unused bytes */
+#define GRABBITS(j) {while(k<(j)){b|=((uLong)NEXTBYTE)<<k;k+=8;}}
+#define UNGRAB {n+=(c=k>>3);p-=c;k&=7;}
+
+/* Called with number of bytes left to write in window at least 258
+ (the maximum string length) and number of input bytes available
+ at least ten. The ten bytes are six bytes for the longest length/
+ distance pair plus four bytes for overloading the bit buffer. */
+
+local int inflate_fast(bl, bd, tl, td, s, z)
+uInt bl, bd;
+inflate_huft *tl, *td;
+inflate_blocks_statef *s;
+z_stream *z;
+{
+ inflate_huft *t; /* temporary pointer */
+ uInt e; /* extra bits or operation */
+ uLong b; /* bit buffer */
+ uInt k; /* bits in bit buffer */
+ Bytef *p; /* input data pointer */
+ uInt n; /* bytes available there */
+ Bytef *q; /* output window write pointer */
+ uInt m; /* bytes to end of window or read pointer */
+ uInt ml; /* mask for literal/length tree */
+ uInt md; /* mask for distance tree */
+ uInt c; /* bytes to copy */
+ uInt d; /* distance back to copy from */
+ Bytef *r; /* copy source pointer */
+
+ /* load input, output, bit values */
+ LOAD
+
+ /* initialize masks */
+ ml = inflate_mask[bl];
+ md = inflate_mask[bd];
+
+ /* do until not enough input or output space for fast loop */
+ do { /* assume called with m >= 258 && n >= 10 */
+ /* get literal/length code */
+ GRABBITS(20) /* max bits for literal/length code */
+ if ((e = (t = tl + ((uInt)b & ml))->exop) == 0)
+ {
+ DUMPBITS(t->bits)
+ Tracevv(( t->base >= 0x20 && t->base < 0x7f ?
+ "inflate: * literal '%c'\n" :
+ "inflate: * literal 0x%02x\n", t->base));
+ *q++ = (Byte)t->base;
+ m--;
+ continue;
+ }
+ do {
+ DUMPBITS(t->bits)
+ if (e & 16)
+ {
+ /* get extra bits for length */
+ e &= 15;
+ c = t->base + ((uInt)b & inflate_mask[e]);
+ DUMPBITS(e)
+ Tracevv(( "inflate: * length %u\n", c));
+
+ /* decode distance base of block to copy */
+ GRABBITS(15); /* max bits for distance code */
+ e = (t = td + ((uInt)b & md))->exop;
+ do {
+ DUMPBITS(t->bits)
+ if (e & 16)
+ {
+ /* get extra bits to add to distance base */
+ e &= 15;
+ GRABBITS(e) /* get extra bits (up to 13) */
+ d = t->base + ((uInt)b & inflate_mask[e]);
+ DUMPBITS(e)
+ Tracevv(( "inflate: * distance %u\n", d));
+
+ /* do the copy */
+ m -= c;
+ if ((uInt)(q - s->window) >= d) /* offset before dest */
+ { /* just copy */
+ r = q - d;
+ *q++ = *r++; c--; /* minimum count is three, */
+ *q++ = *r++; c--; /* so unroll loop a little */
+ }
+ else /* else offset after destination */
+ {
+ e = d - (q - s->window); /* bytes from offset to end */
+ r = s->end - e; /* pointer to offset */
+ if (c > e) /* if source crosses, */
+ {
+ c -= e; /* copy to end of window */
+ do {
+ *q++ = *r++;
+ } while (--e);
+ r = s->window; /* copy rest from start of window */
+ }
+ }
+ do { /* copy all or what's left */
+ *q++ = *r++;
+ } while (--c);
+ break;
+ }
+ else if ((e & 64) == 0)
+ e = (t = t->next + ((uInt)b & inflate_mask[e]))->exop;
+ else
+ {
+ z->msg = "invalid distance code";
+ UNGRAB
+ UPDATE
+ return Z_DATA_ERROR;
+ }
+ } while (1);
+ break;
+ }
+ if ((e & 64) == 0)
+ {
+ if ((e = (t = t->next + ((uInt)b & inflate_mask[e]))->exop) == 0)
+ {
+ DUMPBITS(t->bits)
+ Tracevv(( t->base >= 0x20 && t->base < 0x7f ?
+ "inflate: * literal '%c'\n" :
+ "inflate: * literal 0x%02x\n", t->base));
+ *q++ = (Byte)t->base;
+ m--;
+ break;
+ }
+ }
+ else if (e & 32)
+ {
+ Tracevv(( "inflate: * end of block\n"));
+ UNGRAB
+ UPDATE
+ return Z_STREAM_END;
+ }
+ else
+ {
+ z->msg = "invalid literal/length code";
+ UNGRAB
+ UPDATE
+ return Z_DATA_ERROR;
+ }
+ } while (1);
+ } while (m >= 258 && n >= 10);
+
+ /* not enough input or output--restore pointers and return */
+ UNGRAB
+ UPDATE
+ return Z_OK;
+}
+
+/*+++++*/
+/* zutil.c -- target dependent utility functions for the compression library
+ * Copyright (C) 1995 Jean-loup Gailly.
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* From: zutil.c,v 1.8 1995/05/03 17:27:12 jloup Exp */
+
+char *zlib_version = ZLIB_VERSION;
+
+char *z_errmsg[] = {
+"stream end", /* Z_STREAM_END 1 */
+"", /* Z_OK 0 */
+"file error", /* Z_ERRNO (-1) */
+"stream error", /* Z_STREAM_ERROR (-2) */
+"data error", /* Z_DATA_ERROR (-3) */
+"insufficient memory", /* Z_MEM_ERROR (-4) */
+"buffer error", /* Z_BUF_ERROR (-5) */
+""};
+
+/*+++++*/
+/* adler32.c -- compute the Adler-32 checksum of a data stream
+ * Copyright (C) 1995 Mark Adler
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* From: adler32.c,v 1.6 1995/05/03 17:27:08 jloup Exp */
+
+#define BASE 65521L /* largest prime smaller than 65536 */
+#define NMAX 5552
+/* NMAX is the largest n such that 255n(n+1)/2 + (n+1)(BASE-1) <= 2^32-1 */
+
+#define DO1(buf) {s1 += *buf++; s2 += s1;}
+#define DO2(buf) DO1(buf); DO1(buf);
+#define DO4(buf) DO2(buf); DO2(buf);
+#define DO8(buf) DO4(buf); DO4(buf);
+#define DO16(buf) DO8(buf); DO8(buf);
+
+/* ========================================================================= */
+uLong adler32(adler, buf, len)
+ uLong adler;
+ Bytef *buf;
+ uInt len;
+{
+ unsigned long s1 = adler & 0xffff;
+ unsigned long s2 = (adler >> 16) & 0xffff;
+ int k;
+
+ if (buf == Z_NULL) return 1L;
+
+ while (len > 0) {
+ k = len < NMAX ? len : NMAX;
+ len -= k;
+ while (k >= 16) {
+ DO16(buf);
+ k -= 16;
+ }
+ if (k != 0) do {
+ DO1(buf);
+ } while (--k);
+ s1 %= BASE;
+ s2 %= BASE;
+ }
+ return (s2 << 16) | s1;
+}
diff --git a/bsps/powerpc/motorola_powerpc/bootloader/zlib.h b/bsps/powerpc/motorola_powerpc/bootloader/zlib.h
new file mode 100644
index 0000000000..f778b3dcd5
--- /dev/null
+++ b/bsps/powerpc/motorola_powerpc/bootloader/zlib.h
@@ -0,0 +1,434 @@
+/*
+ * This file is derived from zlib.h and zconf.h from the zlib-0.95
+ * distribution by Jean-loup Gailly and Mark Adler, with some additions
+ * by Paul Mackerras to aid in implementing Deflate compression and
+ * decompression for PPP packets.
+ */
+
+/*
+ * ==FILEVERSION 960122==
+ *
+ * This marker is used by the Linux installation script to determine
+ * whether an up-to-date version of this file is already installed.
+ */
+
+/* zlib.h -- interface of the 'zlib' general purpose compression library
+ version 0.95, Aug 16th, 1995.
+
+ Copyright (C) 1995 Jean-loup Gailly and Mark Adler
+
+ This software is provided 'as-is', without any express or implied
+ warranty. In no event will the authors be held liable for any damages
+ arising from the use of this software.
+
+ Permission is granted to anyone to use this software for any purpose,
+ including commercial applications, and to alter it and redistribute it
+ freely, subject to the following restrictions:
+
+ 1. The origin of this software must not be misrepresented; you must not
+ claim that you wrote the original software. If you use this software
+ in a product, an acknowledgment in the product documentation would be
+ appreciated but is not required.
+ 2. Altered source versions must be plainly marked as such, and must not be
+ misrepresented as being the original software.
+ 3. This notice may not be removed or altered from any source distribution.
+
+ Jean-loup Gailly Mark Adler
+ gzip@prep.ai.mit.edu madler@alumni.caltech.edu
+ */
+
+#ifndef _ZLIB_H
+#define _ZLIB_H
+
+#define local
+#ifdef DEBUG_ZLIB
+#include <bsp/consoleIo.h>
+#define fprintf printk
+#endif
+
+/* #include "zconf.h" */ /* included directly here */
+
+/* zconf.h -- configuration of the zlib compression library
+ * Copyright (C) 1995 Jean-loup Gailly.
+ * For conditions of distribution and use, see copyright notice in zlib.h
+ */
+
+/* From: zconf.h,v 1.12 1995/05/03 17:27:12 jloup Exp */
+
+/*
+ The library does not install any signal handler. It is recommended to
+ add at least a handler for SIGSEGV when decompressing; the library checks
+ the consistency of the input data whenever possible but may go nuts
+ for some forms of corrupted input.
+ */
+
+/*
+ * Compile with -DMAXSEG_64K if the alloc function cannot allocate more
+ * than 64k bytes at a time (needed on systems with 16-bit int).
+ * Compile with -DUNALIGNED_OK if it is OK to access shorts or ints
+ * at addresses which are not a multiple of their size.
+ * Under DOS, -DFAR=far or -DFAR=__far may be needed.
+ */
+
+#ifndef STDC
+# if defined(MSDOS) || defined(__STDC__) || defined(__cplusplus)
+# define STDC
+# endif
+#endif
+
+#ifdef __MWERKS__ /* Metrowerks CodeWarrior declares fileno() in unix.h */
+# include <unix.h>
+#endif
+
+/* Maximum value for memLevel in deflateInit2 */
+#ifndef MAX_MEM_LEVEL
+# ifdef MAXSEG_64K
+# define MAX_MEM_LEVEL 8
+# else
+# define MAX_MEM_LEVEL 9
+# endif
+#endif
+
+#ifndef FAR
+# define FAR
+#endif
+
+/* Maximum value for windowBits in deflateInit2 and inflateInit2 */
+#ifndef MAX_WBITS
+# define MAX_WBITS 15 /* 32K LZ77 window */
+#endif
+
+/* The memory requirements for deflate are (in bytes):
+ 1 << (windowBits+2) + 1 << (memLevel+9)
+ that is: 128K for windowBits=15 + 128K for memLevel = 8 (default values)
+ plus a few kilobytes for small objects. For example, if you want to reduce
+ the default memory requirements from 256K to 128K, compile with
+ make CFLAGS="-O -DMAX_WBITS=14 -DMAX_MEM_LEVEL=7"
+ Of course this will generally degrade compression (there's no free lunch).
+
+ The memory requirements for inflate are (in bytes) 1 << windowBits
+ that is, 32K for windowBits=15 (default value) plus a few kilobytes
+ for small objects.
+*/
+
+ /* Type declarations */
+
+#ifndef OF /* function prototypes */
+# ifdef STDC
+# define OF(args) args
+# else
+# define OF(args) ()
+# endif
+#endif
+
+typedef unsigned char Byte; /* 8 bits */
+typedef unsigned int uInt; /* 16 bits or more */
+typedef unsigned long uLong; /* 32 bits or more */
+
+typedef Byte FAR Bytef;
+typedef char FAR charf;
+typedef int FAR intf;
+typedef uInt FAR uIntf;
+typedef uLong FAR uLongf;
+
+#ifdef STDC
+ typedef void FAR *voidpf;
+ typedef void *voidp;
+#else
+ typedef Byte FAR *voidpf;
+ typedef Byte *voidp;
+#endif
+
+/* end of original zconf.h */
+
+#define ZLIB_VERSION "0.95P"
+
+/*
+ The 'zlib' compression library provides in-memory compression and
+ decompression functions, including integrity checks of the uncompressed
+ data. This version of the library supports only one compression method
+ (deflation) but other algorithms may be added later and will have the same
+ stream interface.
+
+ For compression the application must provide the output buffer and
+ may optionally provide the input buffer for optimization. For decompression,
+ the application must provide the input buffer and may optionally provide
+ the output buffer for optimization.
+
+ Compression can be done in a single step if the buffers are large
+ enough (for example if an input file is mmap'ed), or can be done by
+ repeated calls of the compression function. In the latter case, the
+ application must provide more input and/or consume the output
+ (providing more output space) before each call.
+*/
+
+typedef voidpf (*alloc_func) OF((voidpf opaque, uInt items, uInt size));
+typedef void (*free_func) OF((voidpf opaque, voidpf address, uInt nbytes));
+
+struct internal_state;
+
+typedef struct z_stream_s {
+ Bytef *next_in; /* next input byte */
+ uInt avail_in; /* number of bytes available at next_in */
+ uLong total_in; /* total nb of input bytes read so far */
+
+ Bytef *next_out; /* next output byte should be put there */
+ uInt avail_out; /* remaining free space at next_out */
+ uLong total_out; /* total nb of bytes output so far */
+
+ char *msg; /* last error message, NULL if no error */
+ struct internal_state FAR *state; /* not visible by applications */
+
+ alloc_func zalloc; /* used to allocate the internal state */
+ free_func zfree; /* used to free the internal state */
+ voidp opaque; /* private data object passed to zalloc and zfree */
+
+ Byte data_type; /* best guess about the data type: ascii or binary */
+
+} z_stream;
+
+/*
+ The application must update next_in and avail_in when avail_in has
+ dropped to zero. It must update next_out and avail_out when avail_out
+ has dropped to zero. The application must initialize zalloc, zfree and
+ opaque before calling the init function. All other fields are set by the
+ compression library and must not be updated by the application.
+
+ The opaque value provided by the application will be passed as the first
+ parameter for calls of zalloc and zfree. This can be useful for custom
+ memory management. The compression library attaches no meaning to the
+ opaque value.
+
+ zalloc must return Z_NULL if there is not enough memory for the object.
+ On 16-bit systems, the functions zalloc and zfree must be able to allocate
+ exactly 65536 bytes, but will not be required to allocate more than this
+ if the symbol MAXSEG_64K is defined (see zconf.h). WARNING: On MSDOS,
+ pointers returned by zalloc for objects of exactly 65536 bytes *must*
+ have their offset normalized to zero. The default allocation function
+ provided by this library ensures this (see zutil.c). To reduce memory
+ requirements and avoid any allocation of 64K objects, at the expense of
+ compression ratio, compile the library with -DMAX_WBITS=14 (see zconf.h).
+
+ The fields total_in and total_out can be used for statistics or
+ progress reports. After compression, total_in holds the total size of
+ the uncompressed data and may be saved for use in the decompressor
+ (particularly if the decompressor wants to decompress everything in
+ a single step).
+*/
+
+ /* constants */
+
+#define Z_NO_FLUSH 0
+#define Z_PARTIAL_FLUSH 1
+#define Z_FULL_FLUSH 2
+#define Z_SYNC_FLUSH 3 /* experimental: partial_flush + byte align */
+#define Z_FINISH 4
+#define Z_PACKET_FLUSH 5
+/* See deflate() below for the usage of these constants */
+
+#define Z_OK 0
+#define Z_STREAM_END 1
+#define Z_ERRNO (-1)
+#define Z_STREAM_ERROR (-2)
+#define Z_DATA_ERROR (-3)
+#define Z_MEM_ERROR (-4)
+#define Z_BUF_ERROR (-5)
+/* error codes for the compression/decompression functions */
+
+#define Z_BEST_SPEED 1
+#define Z_BEST_COMPRESSION 9
+#define Z_DEFAULT_COMPRESSION (-1)
+/* compression levels */
+
+#define Z_FILTERED 1
+#define Z_HUFFMAN_ONLY 2
+#define Z_DEFAULT_STRATEGY 0
+
+#define Z_BINARY 0
+#define Z_ASCII 1
+#define Z_UNKNOWN 2
+/* Used to set the data_type field */
+
+#define Z_NULL 0 /* for initializing zalloc, zfree, opaque */
+
+extern char *zlib_version;
+/* The application can compare zlib_version and ZLIB_VERSION for consistency.
+ If the first character differs, the library code actually used is
+ not compatible with the zlib.h header file used by the application.
+ */
+
+ /* basic functions */
+
+extern int inflateInit OF((z_stream *strm));
+/*
+ Initializes the internal stream state for decompression. The fields
+ zalloc and zfree must be initialized before by the caller. If zalloc and
+ zfree are set to Z_NULL, inflateInit updates them to use default allocation
+ functions.
+
+ inflateInit returns Z_OK if success, Z_MEM_ERROR if there was not
+ enough memory. msg is set to null if there is no error message.
+ inflateInit does not perform any decompression: this will be done by
+ inflate().
+*/
+
+extern int inflate OF((z_stream *strm, int flush));
+/*
+ Performs one or both of the following actions:
+
+ - Decompress more input starting at next_in and update next_in and avail_in
+ accordingly. If not all input can be processed (because there is not
+ enough room in the output buffer), next_in is updated and processing
+ will resume at this point for the next call of inflate().
+
+ - Provide more output starting at next_out and update next_out and avail_out
+ accordingly. inflate() always provides as much output as possible
+ (until there is no more input data or no more space in the output buffer).
+
+ Before the call of inflate(), the application should ensure that at least
+ one of the actions is possible, by providing more input and/or consuming
+ more output, and updating the next_* and avail_* values accordingly.
+ The application can consume the uncompressed output when it wants, for
+ example when the output buffer is full (avail_out == 0), or after each
+ call of inflate().
+
+ If the parameter flush is set to Z_PARTIAL_FLUSH or Z_PACKET_FLUSH,
+ inflate flushes as much output as possible to the output buffer. The
+ flushing behavior of inflate is not specified for values of the flush
+ parameter other than Z_PARTIAL_FLUSH, Z_PACKET_FLUSH or Z_FINISH, but the
+ current implementation actually flushes as much output as possible
+ anyway. For Z_PACKET_FLUSH, inflate checks that once all the input data
+ has been consumed, it is expecting to see the length field of a stored
+ block; if not, it returns Z_DATA_ERROR.
+
+ inflate() should normally be called until it returns Z_STREAM_END or an
+ error. However if all decompression is to be performed in a single step
+ (a single call of inflate), the parameter flush should be set to
+ Z_FINISH. In this case all pending input is processed and all pending
+ output is flushed; avail_out must be large enough to hold all the
+ uncompressed data. (The size of the uncompressed data may have been saved
+ by the compressor for this purpose.) The next operation on this stream must
+ be inflateEnd to deallocate the decompression state. The use of Z_FINISH
+ is never required, but can be used to inform inflate that a faster routine
+ may be used for the single inflate() call.
+
+ inflate() returns Z_OK if some progress has been made (more input
+ processed or more output produced), Z_STREAM_END if the end of the
+ compressed data has been reached and all uncompressed output has been
+ produced, Z_DATA_ERROR if the input data was corrupted, Z_STREAM_ERROR if
+ the stream structure was inconsistent (for example if next_in or next_out
+ was NULL), Z_MEM_ERROR if there was not enough memory, Z_BUF_ERROR if no
+ progress is possible or if there was not enough room in the output buffer
+ when Z_FINISH is used. In the Z_DATA_ERROR case, the application may then
+ call inflateSync to look for a good compression block. */
+
+extern int inflateEnd OF((z_stream *strm));
+/*
+ All dynamically allocated data structures for this stream are freed.
+ This function discards any unprocessed input and does not flush any
+ pending output.
+
+ inflateEnd returns Z_OK if success, Z_STREAM_ERROR if the stream state
+ was inconsistent. In the error case, msg may be set but then points to a
+ static string (which must not be deallocated).
+*/
+
+ /* advanced functions */
+
+extern int inflateInit2 OF((z_stream *strm,
+ int windowBits));
+/*
+ This is another version of inflateInit with more compression options. The
+ fields next_out, zalloc and zfree must be initialized before by the caller.
+
+ The windowBits parameter is the base two logarithm of the maximum window
+ size (the size of the history buffer). It should be in the range 8..15 for
+ this version of the library (the value 16 will be allowed soon). The
+ default value is 15 if inflateInit is used instead. If a compressed stream
+ with a larger window size is given as input, inflate() will return with
+ the error code Z_DATA_ERROR instead of trying to allocate a larger window.
+
+ If next_out is not null, the library will use this buffer for the history
+ buffer; the buffer must either be large enough to hold the entire output
+ data, or have at least 1<<windowBits bytes. If next_out is null, the
+ library will allocate its own buffer (and leave next_out null). next_in
+ need not be provided here but must be provided by the application for the
+ next call of inflate().
+
+ If the history buffer is provided by the application, next_out must
+ never be changed by the application since the decompressor maintains
+ history information inside this buffer from call to call; the application
+ can only reset next_out to the beginning of the history buffer when
+ avail_out is zero and all output has been consumed.
+
+ inflateInit2 returns Z_OK if success, Z_MEM_ERROR if there was
+ not enough memory, Z_STREAM_ERROR if a parameter is invalid (such as
+ windowBits < 8). msg is set to null if there is no error message.
+ inflateInit2 does not perform any decompression: this will be done by
+ inflate().
+*/
+
+extern int inflateSync OF((z_stream *strm));
+/*
+ Skips invalid compressed data until the special marker (see deflate()
+ above) can be found, or until all available input is skipped. No output
+ is provided.
+
+ inflateSync returns Z_OK if the special marker has been found, Z_BUF_ERROR
+ if no more input was provided, Z_DATA_ERROR if no marker has been found,
+ or Z_STREAM_ERROR if the stream structure was inconsistent. In the success
+ case, the application may save the current current value of total_in which
+ indicates where valid compressed data was found. In the error case, the
+ application may repeatedly call inflateSync, providing more input each time,
+ until success or end of the input data.
+*/
+
+extern int inflateReset OF((z_stream *strm));
+/*
+ This function is equivalent to inflateEnd followed by inflateInit,
+ but does not free and reallocate all the internal decompression state.
+ The stream will keep attributes that may have been set by inflateInit2.
+
+ inflateReset returns Z_OK if success, or Z_STREAM_ERROR if the source
+ stream state was inconsistent (such as zalloc or state being NULL).
+*/
+
+extern int inflateIncomp OF((z_stream *strm));
+/*
+ This function adds the data at next_in (avail_in bytes) to the output
+ history without performing any output. There must be no pending output,
+ and the decompressor must be expecting to see the start of a block.
+ Calling this function is equivalent to decompressing a stored block
+ containing the data at next_in (except that the data is not output).
+*/
+
+ /* checksum functions */
+
+/*
+ This function is not related to compression but is exported
+ anyway because it might be useful in applications using the
+ compression library.
+*/
+
+extern uLong adler32 OF((uLong adler, Bytef *buf, uInt len));
+
+/*
+ Update a running Adler-32 checksum with the bytes buf[0..len-1] and
+ return the updated checksum. If buf is NULL, this function returns
+ the required initial value for the checksum.
+ An Adler-32 checksum is almost as reliable as a CRC32 but can be computed
+ much faster. Usage example:
+
+ uLong adler = adler32(0L, Z_NULL, 0);
+
+ while (read_buffer(buffer, length) != EOF) {
+ adler = adler32(adler, buffer, length);
+ }
+ if (adler != original_adler) error();
+*/
+
+#ifndef _Z_UTIL_H
+ struct internal_state {int dummy;}; /* hack for buggy compilers */
+#endif
+
+#endif /* _ZLIB_H */
diff --git a/bsps/powerpc/motorola_powerpc/dev/console.inl b/bsps/powerpc/motorola_powerpc/console/console.inl
index 177444e870..177444e870 100644
--- a/bsps/powerpc/motorola_powerpc/dev/console.inl
+++ b/bsps/powerpc/motorola_powerpc/console/console.inl
diff --git a/bsps/powerpc/motorola_powerpc/dev/keyboard.h b/bsps/powerpc/motorola_powerpc/console/keyboard.h
index 0a38d0136d..0a38d0136d 100644
--- a/bsps/powerpc/motorola_powerpc/dev/keyboard.h
+++ b/bsps/powerpc/motorola_powerpc/console/keyboard.h
diff --git a/bsps/powerpc/motorola_powerpc/dev/polled_io.c b/bsps/powerpc/motorola_powerpc/console/polled_io.c
index c718459f13..c718459f13 100644
--- a/bsps/powerpc/motorola_powerpc/dev/polled_io.c
+++ b/bsps/powerpc/motorola_powerpc/console/polled_io.c