From acc25eec35e186abc118b9ca4f097e22fc6b4846 Mon Sep 17 00:00:00 2001 From: Joel Sherrill Date: Thu, 2 Dec 1999 14:31:19 +0000 Subject: Merged of mcp750 and mvme2307 BSP by Eric Valette . As part of this effort, the mpc750 libcpu code is now shared with the ppc6xx. --- .../libbsp/powerpc/shared/bootloader/em86real.S | 4561 ++++++++++++++++++++ 1 file changed, 4561 insertions(+) create mode 100644 c/src/lib/libbsp/powerpc/shared/bootloader/em86real.S (limited to 'c/src/lib/libbsp/powerpc/shared/bootloader/em86real.S') diff --git a/c/src/lib/libbsp/powerpc/shared/bootloader/em86real.S b/c/src/lib/libbsp/powerpc/shared/bootloader/em86real.S new file mode 100644 index 0000000000..a462cf7bdb --- /dev/null +++ b/c/src/lib/libbsp/powerpc/shared/bootloader/em86real.S @@ -0,0 +1,4561 @@ +/* + * em86real.S + * + * Copyright (C) 1998, 1999 Gabriel Paubert, paubert@iram.es + * + * Modified to compile in RTEMS development environment + * by Eric Valette + * + * Copyright (C) 1999 Eric Valette. valette@crf.canon.fr + * + * The license and distribution terms for this file may be + * found in found in the file LICENSE in this distribution or at + * http://www.OARcorp.com/rtems/license.html. + * + * $Id$ + */ + +/* If the symbol __BOOT__ is defined, a slightly different version is + * generated to be compiled with the -m relocatable option + */ + +#ifdef __BOOT__ +#include "bootldr.h" +/* It is impossible to gather statistics in the boot version */ +#undef EIP_STATS +#endif + +/* + * + * Given the size of this code, it deserves a few comments on how it works, + * and why it was implemented the way it is. + * + * The goal is to have a real mode i486SX emulator to initialize hardware, + * mostly graphics boards, by interpreting ROM BIOSes. The choice of a 486SX + * is logical since this is the lowest processor that PCI ROM BIOSes must run + * on. + * + * The goal of this emulator is not performance, but a small enough memory + * footprint to include it in a bootloader. + * + * It is actually likely to be comparable to a 25MHz 386DX on a 200MHz 603e ! + * This is not as serious as it seems since most of the BIOS code performs + * a lot of accesses to I/O and non-cacheable memory spaces. For such + * instructions, the execution time is often dominated by bus accesses. + * Statistics of the code also shows that it spends a large function of + * the time in loops waiting for vertical retrace or programs one of the + * timers and waits for the count to go down to zero. This type of loop + * runs emulated at the same speed as on 5 GHz Pentium IV++ ;) + * + */ + +/* + * Known bugs or differences with a real 486SX (real mode): + * - segment limits are not enforced (too costly) + * - xchg instructions with memory are not locked + * - lock prefixes are not implemented at all + * - long divides implemented but perhaps still buggy + * - miscellaneous system instructions not implemented + * (some probably cannot be implemented) + * - neither control nor debug registers are implemented for the time being + * (debug registers are impossible to implement at a reasonable cost) + */ + +/* Code options, put them on the compiler command line */ +/* #define EIP_STATS */ /* EIP based profiling */ +/* #undef EIP_STATS */ + +/* + * Implementation notes: + * + * A) flags emulation. + * + * The most important decisions when it comes to obtain a reasonable speed + * are related to how the EFLAGS register is emulated. + * + * Note: the code to set up flags is complex, but it is only seldom + * executed since cmp and test instructions use much faster flag evaluation + * paths. For example the overflow flag is almost only needed for pushf and + * int. Comparison results only involve (SF^OF) or (SF^OF)+ZF and the + * implementation is fast in this case. + * + * Rarely used flags: AC, NT and IOPL are kept in a memory EFLAGS image. + * All other flags are either kept explicitly in PPC cr (DF, IF, and TF) or + * lazily evaluated from the state of 4 registers called flags, result, op1, + * op2, and sometimes the cr itself. The emulation has been designed for + * minimal overhead for the common case where the flags are never used. With + * few exceptions, all instructions that set flags leave the result of the + * computation in a register called result, and operands are taken from op1 + * and op2 registers. However a few instructions like cmp, test and bit tests + * (bt/btc/btr/bts/bsf/bsr) explicitly set cr bits to short circuit + * condition code evaluation of conditional instructions. + * + * As a very brief summary: + * + * - the result of the last flag setting operation is often either in the + * result register or in op2 after increment or decrement instructions + * because result and op1 may be needed to compute the carry. + * + * - compare instruction leave the result of the unsigned comparison + * in cr4 and of signed comparison in cr6. This means that: + * - cr4[0]=CF (short circuit for jc/jnc) + * - cr4[1]=~(CF+ZF) (short circuit for ja/jna) + * - cr6[0]=(OF^SF) (short circuit for jl/jnl) + * - cr6[1]=~((SF^OF)+ZF) (short circuit for jg/jng) + * - cr6[2]=ZF (short circuit for jz/jnz) + * + * - test instruction set flags in cr6 and clear overflow. This means that: + * - cr6[0]=SF=(SF^OF) (short circuit for jl/jnl/js/jns) + * - cr6[1]=~((SF^OF)+ZF) (short circuit for jg/jng) + * - cr6[2]=ZF (short circuit for jz/jnz) + * + * All flags may be lazily evaluated from several values kept in registers: + * + * Flag: Depends upon: + * OF result, op1, op2, flags[INCDEC_FIELD,SUBTRACTING,OF_STATE_MASK] + * SF result, op2, flags[INCDEC_FIELD,RES_SIZE] + * ZF result, op2, cr6[2], flags[INCDEC_FIELD,RES_SIZE,ZF_PROTECT] + * AF op1, op2, flags[INCDEC_FIELD,SUBTRACTING,CF_IN] + * PF result, op2, flags[INCDEC_FIELD] + * CF result, op1, flags[CF_STATE_MASK, CF_IN] + * + * The order of the fields in the flags register has been chosen so that a + * single rlwimi is necessary for common instruction that do not affect all + * flags. (See the code for inc/dec emulation). + * + * + * B) opcodes and prefixes. + * + * The register called opcode holds in its low order 8 bits the opcode + * (second byte if the first byte is 0x0f). More precisely it holds the + * last byte fetched before the modrm byte or the immediate operand(s) + * of the instruction, if any. High order 24 bits are zero unless the + * instruction has prefixes. These higher order bits have the following + * meaning: + * 0x80000000 segment override prefix + * 0x00001000 repnz prefix (0xf2) + * 0x00000800 repz prefix (0xf3) + * 0x00000400 address size prefix (0x67) + * 0x00000200 operand size prefix (0x66) + * (bit 0x1000 and 0x800 cannot be set simultaneously) + * + * Therefore if there is a segment override the value will be between very + * negative (between 0x80000000 and 0x800016ff), if there is no segment + * override, the value will be between 0 and 0x16ff. The reason for + * this choice will be understood in the next part. + * + * C) addresing mode description tables. + * + * the encoding of the modrm bytes (especially in 16 bit mode) is quite + * complex. Hence a table, indexed by the five useful bits of the modrm + * byte is used to simplify decoding. Here is a description: + * + * bit mask meaning + * 0x80000000 use ss as default segment register + * 0x00004000 means that this addressing mode needs a base register + * (set for all entries except sib and displacement-only) + * 0x00002000 set if preceding is not set + * 0x00001000 set if an sib follows + * 0x00000700 base register to use (16 and 32 bit) + * 0x00000080 set in 32 bit addressing mode table, cleared in 16 bit + * (so extsb mask,entry; ori mask,mask,0xffff gives a mask) + * 0x00000070 kludge field, possible values are + * 0: 16 bit addressing mode without index + * 10: 32 bit addressing mode + * 60: 16 bit addressing mode with %si as index + * 70: 16 bit addressing mode with %di as index + * + * This convention leads to the following special values used to check for + * sib present and displacement-only, which happen to the three lowest + * values in the table (unsigned): + * 0x00003090 sib follows (implies it is a 32 bit mode) + * 0x00002090 32 bit displacement-only + * 0x00002000 16 bit displacement-only + * + * This means that all entries are either very negative in the 0x80002000 + * range if the segment defaults to ss or higher than 0x2000 if it defaults + * to ds. Combined with the value in opcode this gives the following table: + * opcode entry entry>opcode ? segment to use + * positive positive yes ds (default) + * negative positive yes overriden by prefix + * positive negative no ss + * negative negative yes overridden by prefix + * + * Hence a simple comparison allows to check for the need to override + * the current base with ss, i.e., when ss is the default base and the + * instruction has no override prefix. + * + * D) BUGS + * + * This software is obviously bug-free :-). Nevertheless, if you encounter + * an interesting feature. Mail me a note, if possible with a detailed + * instruction example showing where and how it fails. + * + */ + + +/* Now the details of flag evaluation with the necessary macros */ + +/* Alignment check is toggable so the system believes it is a 486, but +CPUID is not to avoid unnecessary complexities. However, alignment +is actually never checked (real mode is CPL 0 anyway). */ +#define AC86 13 /* Can only be toggled */ +#define VM86 14 /* Not used for now */ +#define RF86 15 /* Not emulated precisely */ +/* Actually NT and IOPL are kept in memory */ +#define NT86 17 +#define IOPL86 18 /* Actually 18 and 19 */ +#define OF86 20 +#define DF86 21 +#define IF86 22 +#define TF86 23 +#define SF86 24 +#define ZF86 25 +#define AF86 27 +#define PF86 29 +#define CF86 31 + +/* Where the less important flags are placed in PPC cr */ +#define RF 20 /* Suppress trap flag: cr5[0] */ +#define DF 21 /* Direction flag: cr5[1] */ +#define IF 22 /* Interrupt flag: cr5[2] */ +#define TF 23 /* Single step flag: cr5[3] */ + +/* Now the flags which are frequently used */ +/* + * CF_IN is a copy of the input carry with PPC polarity, + * it is cleared for add, set for sub and cmp, + * equal to the x86 carry for adc and to its complement for sbb. + * it is used to evaluate AF and CF. + */ +#define CF_IN 0x80000000 + +/* #define GET_CF_IN(dst) rlwinm dst,flags,1,0x01 */ + +/* CF_IN_CR set in flags means that cr4[0] is a copy of carry bit */ +#define CF_IN_CR 0x40000000 + +#define EVAL_CF andis. r3,flags,(CF_IN_CR)>>16; beql- _eval_cf + +/* + * CF_STATE tells how to compute the carry bit. + * NOTRESULT16 and NOTRESULT8 are never set explicitly, + * but they may happen after a cmc instruction. + */ +#define CF 16 /* cr4[0] */ +#define CF_LOCATION 0x30000000 +#define CF_ZERO 0x00000000 +#define CF_EXPLICIT 0x00000000 +#define CF_COMPLEMENT 0x08000000 /* Indeed a polarity bit */ +#define CF_STATE_MASK (CF_LOCATION|CF_COMPLEMENT) +#define CF_VALUE 0x08000000 +#define CF_SET 0x08000000 +#define CF_RES32 0x10000000 +#define CF_NOTRES32 0x18000000 +#define CF_RES16 0x20000000 +#define CF_NOTRES16 0x28000000 +#define CF_RES8 0x30000000 +#define CF_NOTRES8 0x38000000 + +#define CF_ADDL CF_RES32 +#define CF_SUBL CF_NOTRES32 +#define CF_ADDW CF_RES16 +#define CF_SUBW CF_RES16 +#define CF_ADDB CF_RES8 +#define CF_SUBB CF_RES8 + +#define CF_ROTCNT(dst) rlwinm dst,flags,7,0x18 +#define CF_POL(dst,pos) rlwinm dst,flags,(36-pos)%32,pos,pos +#define CF_POL_INSERT(dst,pos) \ + rlwimi dst,flags,(36-pos)%32,pos,pos +#define RES2CF(dst) rlwinm dst,result,8,7,15 + +/* + * OF_STATE tells how to compute the overflow bit. When the low order bit + * is set (OF_EXPLICIT), it means that OF is the exclusive or of the + * two other bits. For the reason of this choice, see rotate instructions. + */ +#define OF 1 /* Only after EVAL_OF */ +#define OF_STATE_MASK 0x07000000 +#define OF_INCDEC 0x00000000 +#define OF_EXPLICIT 0x01000000 +#define OF_ZERO 0x01000000 +#define OF_VALUE 0x04000000 +#define OF_SET 0x04000000 +#define OF_ONE 0x05000000 +#define OF_XOR 0x06000000 +#define OF_ARITHL 0x06000000 +#define OF_ARITHW 0x02000000 +#define OF_ARITHB 0x04000000 + +#define EVAL_OF rlwinm. r3,flags,6,0,1; bngl+ _eval_of; andis. r3,flags,OF_VALUE>>16 + +/* See _eval_of to see how this can be used */ +#define OF_ROTCNT(dst) rlwinm dst,flags,10,0x1c + +/* + * SIGNED_IN_CR means that cr6 is set as after a signed compare: + * - cr6[0] is SF^OF for jl/jnl/setl/setnl... + * - cr6[1] is ~((SF^OF)+ZF) for jg/jng/setg/setng... + * - cr6[2] is ZF (ZF_IN_CR is always set if this bit is set) + */ +#define SLT 24 /* cr6[0], signed less than */ +#define SGT 25 /* cr6[1], signed greater than */ +#define SIGNED_IN_CR 0x00800000 + +#define EVAL_SIGNED andis. r3,flags,SIGNED_IN_CR>>16; beql- _eval_signed + +/* + * Above in CR means that cr4 is set as after an unsigned compare: + * - cr4[0] is CF (CF_IN_CR is also set) + * - cr4[1] is ~(CF+ZF) (ZF_IN_CR is also set) + */ +#define ABOVE 17 /* cr4[1] */ +#define ABOVE_IN_CR 0x00400000 + +#define EVAL_ABOVE andis. r3,flags,ABOVE_IN_CR>>16; beql- _eval_above + +/* SF_IN_CR means cr6[0] is a copy of SF. It implies ZF_IN_CR is also set */ +#define SF 24 /* cr6[0] */ +#define SF_IN_CR 0x00200000 + +#define EVAL_SF andis. r3,flags,SF_IN_CR>>16; beql- _eval_sf_zf + +/* ZF_IN_CR means cr6[2] is a copy of ZF. */ +#define ZF 26 +#define ZF_IN_CR 0x00100000 + +#define EVAL_ZF andis. r3,flags,ZF_IN_CR>>16; beql- _eval_sf_zf +#define ZF2ZF86(s,d) rlwimi d,s,ZF-ZF86,ZF86,ZF86 +#define ZF862ZF(reg) rlwimi reg,reg,32+ZF86-ZF,ZF,ZF + +/* + * ZF_PROTECT means cr6[2] is the only valid value for ZF. This is necessary + * because some infrequent instructions may leave SF and ZF in an apparently + * inconsistent state (both set): sahf, popf and the few (not implemented) + * instructions that only affect ZF. + */ +#define ZF_PROTECT 0x00080000 + +/* The parity is always evaluated when it is needed */ +#define PF 0 /* Only after EVAL_PF */ +#define EVAL_PF bl _eval_pf + +/* This field gives the shift amount to use to evaluate SF + and ZF when ZF_PROTECT is not set */ +#define RES_SIZE_MASK 0x00060000 +#define RESL 0x00000000 +#define RESW 0x00040000 +#define RESB 0x00060000 + +#define RES_SHIFT(dst) rlwinm dst,flags,18,0x18 + +/* SUBTRACTING is set if the last flag setting instruction was sub/sbb/cmp, + used to evaluate OF and AF */ +#define SUBTRACTING 0x00010000 + +#define GET_ADDSUB(dst) rlwinm dst,flags,16,0x01 + +/* rotate (rcl/rcr/rol/ror) affect CF and OF but not other flags */ +#define ROTATE_MASK (CF_IN_CR|CF_STATE_MASK|ABOVE_IN_CR|OF_STATE_MASK|SIGNED_IN_CR) +#define ROTATE_FLAGS rlwimi flags,one,24,ROTATE_MASK + +/* + * INCDEC_FIELD has at most one bit set when the last flag setting instruction + * was either inc or dec (which do not affect the carry). When one of these + * bits is set, it affects the way OF, SF, ZF, AF, and PF are evaluated. + */ +#define INCDEC_FIELD 0x0000ff00 + +#define DECB_SHIFT 8 +#define INCB_SHIFT 9 +#define DECW_SHIFT 10 +#define INCW_SHIFT 11 +#define DECL_SHIFT 14 +#define INCL_SHIFT 15 + +#define INCDEC_MASK (OF_STATE_MASK|SIGNED_IN_CR|ABOVE_IN_CR|SF_IN_CR|\ + ZF_IN_CR|ZF_PROTECT|RES_SIZE_MASK|SUBTRACTING|\ + INCDEC_FIELD) +/* Operations to perform to tell where the flags are after inc or dec */ +#define INC_FLAGS(BWL) rlwimi flags,one,INC##BWL##_SHIFT,INCDEC_MASK +#define DEC_FLAGS(BWL) rlwimi flags,one,DEC##BWL##_SHIFT,INCDEC_MASK + +/* How the flags are set after arithmetic operations */ +#define FLAGS_ADD(BWL) (CF_ADD##BWL|OF_ARITH##BWL|RES##BWL) +#define FLAGS_SBB(BWL) (CF_SUB##BWL|OF_ARITH##BWL|RES##BWL|SUBTRACTING) +#define FLAGS_SUB(BWL) FLAGS_SBB(BWL)|CF_IN +#define FLAGS_CMP(BWL) FLAGS_SUB(BWL)|ZF_IN_CR|CF_IN_CR|SIGNED_IN_CR|ABOVE_IN_CR + +/* How the flags are set after logical operations */ +#define FLAGS_LOG(BWL) (CF_ZERO|OF_ZERO|RES##BWL) +#define FLAGS_TEST(BWL) FLAGS_LOG(BWL)|ZF_IN_CR|SIGNED_IN_CR|SF_IN_CR + +/* How the flags are set after bt/btc/btr/bts. */ +#define FLAGS_BTEST CF_IN_CR|CF_ADDL|OF_ZERO|RESL + +/* How the flags are set after bsf/bsr. */ +#define FLAGS_BSRCH(WL) CF_ZERO|OF_ZERO|RES##WL|ZF_IN_CR + +/* How the flags are set after logical right shifts */ +#define FLAGS_SHR(BWL) (CF_EXPLICIT|OF_ARITH##BWL|RES##BWL) + +/* How the flags are set after double length shifts */ +#define FLAGS_DBLSH(WL) (CF_EXPLICIT|OF_ARITH##WL|RES##WL) + +/* How the flags are set after multiplies */ +#define FLAGS_MUL (CF_EXPLICIT|OF_EXPLICIT) + +#define SET_FLAGS(fl) lis flags,(fl)>>16 +#define ADD_FLAGS(fl) addis flags,flags,(fl)>>16 + +/* + * We are always off by one when compared with Intel's eip, this shortens + * code by allowing to load next byte with lbzu x,1(eip). The register + * called eip actually contains csbase+eip, and thus should be called lip + * for linear ip. + */ + +/* + * Reason codes passed to the C part of the emulator, this includes all + * instructions which may change the current code segment. These definitions + * will soon go into a separate include file. Codes 0 to 255 correspond + * directly to the interrupt/trap that has to be generated. + */ + +#define code_divide_err 0 +#define code_trap 1 +#define code_int3 3 +#define code_into 4 +#define code_bound 5 +#define code_ud 6 +#define code_dna 7 /* FPU not available */ + +#define code_iretw 256 /* Interrupt returns */ +#define code_iretl 257 +#define code_lcallw 258 /* Far calls and jumps */ +#define code_lcalll 259 +#define code_ljmpw 260 +#define code_ljmpl 261 +#define code_lretw 262 /* Far returns */ +#define code_lretl 263 +#define code_softint 264 /* int $xx */ +#define code_lock 265 /* Lock prefix */ +/* Codes 1024 to 2047 are used for I/O port access instructions: + - The three LSB define the port size (1, 2 or 4) + - bit of weight 512 means out if set, in if clear + - bit of weight 256 means ins/outs if set, in/out if clear + - bit of weight 128 means use 32 bit addresses if set, 16 bit if clear + (only used for ins/outs instructions, always clear for in/out) + */ +#define code_inb 1024+1 +#define code_inw 1024+2 +#define code_inl 1024+4 +#define code_outb 1024+512+1 +#define code_outw 1024+512+2 +#define code_outl 1024+512+4 +#define code_insb_a16 1024+256+1 +#define code_insw_a16 1024+256+2 +#define code_insl_a16 1024+256+4 +#define code_outsb_a16 1024+512+256+1 +#define code_outsw_a16 1024+512+256+2 +#define code_outsl_a16 1024+512+256+4 +#define code_insb_a32 1024+256+128+1 +#define code_insw_a32 1024+256+128+2 +#define code_insl_a32 1024+256+128+4 +#define code_outsb_a32 1024+512+256+128+1 +#define code_outsw_a32 1024+512+256+128+2 +#define code_outsl_a32 1024+512+256+128+4 + +#define state 31 +/* r31 (state) is a pointer to a structure describing the emulated x86 +processor, its layout is the following: + +first the general purpose registers, they are in little endian byte order + +offset name + + 0 eax/ax/al + 1 ah + 4 ecx/cx/cl + 5 ch + 8 edx/dx/dl + 9 dh + 12 ebx/bx/bl + 13 bh + 16 esp/sp + 20 ebp/bp + 24 esi/si + 28 edi/di +*/ + +#define AL 0 +#define AX 0 +#define EAX 0 +#define AH 1 +#define CL 4 +#define CX 4 +#define ECX 4 +#define DX 8 +#define EDX 8 +#define BX 12 +#define EBX 12 +#define SP 16 +#define ESP 16 +#define BP 20 +#define EBP 20 +#define SI 24 +#define ESI 24 +#define DI 28 +#define EDI 28 + +/* +than the rest of the machine state, big endian ! + +offset name + + 32 essel segment register selectors (values) + 36 cssel + 40 sssel + 44 dssel + 48 fssel + 52 gssel + 56 eipimg true eip (register named eip is csbase+eip) + 60 eflags eip and eflags only valid when C code running ! + 64 esbase segment registers bases + 68 csbase + 72 ssbase + 76 dsbase + 80 fsbase + 84 gsbase + 88 iobase For I/O instructions, I/O space virtual base + 92 ioperm I/O permission bitmap pointer + 96 reason Reason code when calling external emulator + 100 nexteip eip past instruction for external emulator + 104 parm1 parameter for external emulator + 108 parm2 parameter for external emulator + 112 _opcode current opcode register for external emulator + 116 _base segment register base for external emulator + 120 _offset intruction operand offset + More internal state was dumped here for debugging in first versions + + 128 vbase where the 1Mb memory is mapped + 132 cntimg instruction counter + 136 scratch + 192 eipstat array of 32k unsigned long pairs for eip stats +*/ + +#define essel 32 +#define cssel 36 +#define sssel 40 +#define dssel 44 +#define fssel 48 +#define gssel 52 +#define eipimg 56 +#define eflags 60 +#define esbase 64 +#define csbase 68 +#define ssbase 72 +#define dsbase 76 +#define fsbase 80 +#define gsbase 84 +#define iobase 88 +#define ioperm 92 +#define reason 96 +#define nexteip 100 +#define parm1 104 +#define parm2 108 +#define _opcode 112 +#define _base 116 +#define _offset 120 +#define vbase 128 +#define cntimg 132 +#ifdef EIP_STATS +#define eipstat 192 +#endif +/* Global registers */ + +/* Some segment register bases are permanently kept in registers since they +are often used: these are csb, esb and ssb because they are +required for jumps, string instructions, and pushes/pops/calls/rets. +dsbase is not kept in a register but loaded from memory to allow somewhat +more parallelism in the main emulation loop. +*/ + +#define one 30 /* Constant one, so pervasive */ +#define ssb 29 +#define csb 28 +#define esb 27 +#define eip 26 /* That one is indeed csbase+(e)ip-1 */ +#define result 25 /* For the use of result, op1, op2 */ +#define op1 24 /* see the section on flag emulation */ +#define op2 23 +#define opbase 22 /* default opcode table */ +#define flags 21 /* See earlier description */ +#define opcode 20 /* Opcode */ +#define opreg 19 /* Opcode extension/register number */ +/* base is reloaded with the base of the ds segment at the beginning of +every instruction, it is modified by segment override prefixes, when +the default base segment is ss, or when the modrm byte specifies a +register operand */ +#define base 18 /* Instruction's operand segment base */ +#define offset 17 /* Instruction's memory operand offset */ +/* used to address a table telling how to decode the addressing mode +specified by the modrm byte */ +#define adbase 16 /* addressing mode table */ +/* Following registers are used only as dedicated temporaries during decoding, +they are free for use during emulation */ +/* + * ceip (current eip) is only in use when we call the external emulator for + * instructions that fault. Note that it is forbidden to change flags before + * the check for the fault happens (divide by zero...) ! ceip is also used + * when measuring timing. + */ +#define ceip 15 + +/* A register used to measure timing information (when enabled) */ +#ifdef EIP_STATS +#define tstamp 14 +#endif + +#define count 12 /* Instruction counter. */ + +#define r0 0 +#define r1 1 /* PPC Stack pointer. */ +#define r3 3 +#define r4 4 +#define r5 5 +#define r6 6 +#define r7 7 + +/* Macros to read code stream */ +#define NEXTBYTE(dest) lbzu dest,1(eip) +#define NEXTWORD(dest) lhbrx dest,eip,one; la eip,2(eip) +#define NEXTDWORD(dest) lwbrx dest,eip,one; la eip,4(eip) +#define NEXT b nop +#define GOTNEXT b gotopcode + +#ifdef __BOOT__ + START_GOT + GOT_ENTRY(_jtables) + GOT_ENTRY(jtab_www) + GOT_ENTRY(adtable) + END_GOT +#else + .text +#endif + .align 2 + .global em86_enter + .type em86_enter,@function +em86_enter: stwu r1,-96(r1) # allocate stack + mflr r0 + stmw 14,24(r1) + mfcr r4 + stw r0,100(r1) + mr state,r3 + stw r4,20(r1) +#ifdef __BOOT__ +/* We need this since r30 is the default GOT pointer */ +#define r30 30 + GET_GOT +/* The relocation of these tables is explicit, this could be done + * automatically with fixups but would add more than 8kb in the fixup tables. + */ + lwz r3,GOT(_jtables) + lwz r4,_endjtables-_jtables(r3) + sub. r4,r3,r4 + beq+ 1f + li r0,((_endjtables-_jtables)>>2)+1 + addi r3,r3,-4 + mtctr r0 +0: lwzu r5,4(r3) + add r5,r5,r4 + stw r5,0(r3) + bdnz 0b +1: lwz adbase,GOT(adtable) + lwz opbase,GOT(jtab_www) +/* Now r30 is only used as constant 1 */ +#undef r30 + li one,1 # pervasive constant +#else + lis opbase,jtab_www@ha + lis adbase,adtable@ha + li one,1 # pervasive constant + addi opbase,opbase,jtab_www@l + addi adbase,adbase,adtable@l +#ifdef EIP_STATS + li ceip,0 + mftb tstamp +#endif +#endif +/* We branch back here when calling an external function tells us to resume */ +restart: lwz r3,eflags(state) + lis flags,(OF_EXPLICIT|ZF_IN_CR|ZF_PROTECT|SF_IN_CR)>>16 + lwz csb,csbase(state) + extsb result,r3 # SF/PF + rlwinm op1,r3,31,0x08 # AF + lwz eip,eipimg(state) + ZF862ZF(r3) # cr6 + addi op2,op1,0 # AF + lwz ssb,ssbase(state) + rlwimi flags,r3,15,OF_VALUE # OF + rlwimi r3,r3,32+RF86-RF,RF,RF # RF + lwz esb,esbase(state) + ori result,result,0xfb # PF + mtcrf 0x06,r3 # RF/DF/IF/TF/SF/ZF + lbzux opcode,eip,csb + rlwimi flags,r3,27,CF_VALUE # CF + xori result,result,0xff # PF + lwz count,cntimg(state) + GOTNEXT # start the emulator + +/* Now return */ +exit: lwz r0,100(r1) + lwz r4,20(r1) + mtlr r0 + lmw 14,24(r1) + mtcr r4 + addi r1,r1,96 + blr + +trap: crmove 0,RF + crclr RF + bt- 0,resume + sub ceip,eip,csb + li r3,code_trap +complex: addi eip,eip,1 + stw r3,reason(state) + sub eip,eip,csb + stw op1,240(state) + stw op2,244(state) + stw result,248(state) + stw flags,252(state) + stw r4,parm1(state) + stw r5,parm2(state) + stw opcode,_opcode(state) + bl _eval_flags + stw base,_base(state) + stw eip,nexteip(state) + stw r3,eflags(state) + mr r3,state + stw offset,_offset(state) + stw ceip,eipimg(state) + stw count,cntimg(state) + bl em86_trap + cmpwi r3,0 + bne exit + b restart + +/* Main loop */ +/* + * The two LSB of each entry in the main table mean the following: + * 00: indirect opcode: modrm follows and the three middle bits are an + * opcode extension. The entry points to another jump table. + * 01: direct instruction, branch directly to the routine. + * 10: modrm specifies byte size memory and register operands. + * 11: modrm specifies word/long memory and register operands. + * + * The modrm byte, if present, is always loaded in r7. + * + * Note: most "mr x,y" instructions have been replaced by "addi x,y,0" since + * the latter can be executed in the second integer unit on 603e. + */ + +/* + * This code is very good example of absolutely unmaintainable code. + * It was actually much easier to write than it is to understand ! + * If my computations are right, the maximum path length from fetching + * the opcode to exiting to the actual instruction execution is + * 46 instructions (for non-prefixed, single byte opcode instructions). + * + */ + .align 5 +#ifdef EIP_STATS +nop: NEXTBYTE(opcode) +gotopcode: slwi r3,opcode,2 + bt- TF,trap +resume: lwzx r4,opbase,r3 + addi r5,state,eipstat+4 + clrlslwi r6,ceip,17,3 + mtctr r4 + lwzux r7,r5,r6 + slwi. r0,r4,30 # two lsb of table entry + sub r7,r7,tstamp + lwz r6,-4(r5) + mftb tstamp + addi r6,r6,1 + sub ceip,eip,csb + stw r6,-4(r5) + add r7,r7,tstamp + lwz base,dsbase(state) + stw r7,0(r5) +#else +nop: NEXTBYTE(opcode) +gotopcode: slwi r3,opcode,2 + bt- TF,trap +resume: lwzx r4,opbase,r3 + sub ceip,eip,csb + mtctr r4 + slwi. r0,r4,30 # two lsb of table entry + lwz base,dsbase(state) + addi count,count,1 +#endif + bgtctr- # for instructions without modrm + +/* modrm byte present */ + NEXTBYTE(r7) # modrm byte + cmplwi cr1,r7,192 + rlwinm opreg,r7,31,0x1c + beq- cr0,8f # extended opcode +/* modrm with middle 3 bits specifying a register (non prefixed) */ + rlwinm r0,r4,3,0x8 + li r4,0x1c0d + rlwimi opreg,r7,27,0x01 + srw r4,r4,r0 + and opreg,opreg,r4 + blt cr1,9f +/* modrm with 2 register operands */ +1: rlwinm offset,r7,2,0x1c + addi base,state,0 + rlwimi offset,r7,30,0x01 + and offset,offset,r4 + bctr + +/* Prefixes: first segment overrides */ + .align 4 +_es: NEXTBYTE(r7); addi base,esb,0 + oris opcode,opcode,0x8000; b 2f +_cs: NEXTBYTE(r7); addi base,csb,0 + oris opcode,opcode,0x8000; b 2f +_fs: NEXTBYTE(r7); lwz base,fsbase(state) + oris opcode,opcode,0x8000; b 2f +_gs: NEXTBYTE(r7); lwz base,gsbase(state) + oris opcode,opcode,0x8000; b 2f +_ss: NEXTBYTE(r7); addi base,ssb,0 + oris opcode,opcode,0x8000; b 2f +_ds: NEXTBYTE(r7) + oris opcode,opcode,0x8000; b 2f + +/* Lock (unimplemented) and repeat prefixes */ +_lock: li r3,code_lock; b complex +_repnz: NEXTBYTE(r7); rlwimi opcode,one,12,0x1800; b 2f +_repz: NEXTBYTE(r7); rlwimi opcode,one,11,0x1800; b 2f + +/* Operand and address size prefixes */ + .align 4 +_opsize: NEXTBYTE(r7); ori opcode,opcode,0x200 + rlwinm r3,opcode,2,0x1ffc; b 2f +_adsize: NEXTBYTE(r7); ori opcode,opcode,0x400 + rlwinm r3,opcode,2,0x1ffc; b 2f + +_twobytes: NEXTBYTE(r7); addi r3,r3,0x400 +2: rlwimi r3,r7,2,0x3fc + lwzx r4,opbase,r3 + rlwimi opcode,r7,0,0xff + mtctr r4 + slwi. r0,r4,30 + bgtctr- # direct instruction +/* modrm byte in a prefixed instruction */ + NEXTBYTE(r7) # modrm byte + cmpwi cr1,r7,192 + rlwinm opreg,r7,31,0x1c + beq- 6f +/* modrm with middle 3 bits specifying a register (prefixed) */ + rlwinm r0,r4,3,0x8 + li r4,0x1c0d + rlwimi opreg,r7,27,0x01 + srw r4,r4,r0 + and opreg,opreg,r4 + bnl cr1,1b # 2 register operands +/* modrm specifying memory with prefix */ +3: rlwinm r3,r3,27,0xff80 + rlwimi adbase,r7,2,0x1c + extsh r3,r3 + rlwimi r3,r7,31,0x60 + lwzx r4,r3,adbase + cmpwi cr1,r4,0x3090 + bnl+ cr1,10f +/* displacement only addressing modes */ +4: cmpwi r4,0x2000 + bne 5f + NEXTWORD(offset) + bctr +5: NEXTDWORD(offset) + bctr +/* modrm with opcode extension (prefixed) */ +6: lwzx r4,r4,opreg + mtctr r4 + blt cr1,3b +/* modrm with opcode extension and register operand */ +7: rlwinm offset,r7,2,0x1c + addi base,state,0 + rlwinm r0,r4,3,0x8 + li r4,0x1c0d + rlwimi offset,r7,30,0x01 + srw r4,r4,r0 + and offset,offset,r4 + bctr +/* modrm with opcode extension (non prefixed) */ +8: lwzx r4,r4,opreg + mtctr r4 +/* FIXME ? We continue fetching even if the opcode extension is undefined. + * It shouldn't do any harm on real mode emulation anyway, and for ROM + * BIOS emulation, we are supposed to read valid code. + */ + bnl cr1,7b +/* modrm specifying memory without prefix */ +9: rlwimi adbase,r7,2,0x1c # memory addressing mode computation + rlwinm r3,r7,31,0x60 + lwzx r4,r3,adbase + cmplwi cr1,r4,0x3090 + blt- cr1,4b # displacement only addressing mode +10: rlwinm. r0,r7,24,0,1 # three cases distinguished + beq- cr1,15f # an sib follows + rlwinm r3,r4,30,0x1c # 16bit/32bit/%si index/%di index + cmpwi cr1,r3,8 # set cr1 as early as possible + rlwinm r6,r4,26,0x1c # base register + lwbrx offset,state,r6 # load the base register + beq cr0,14f # no displacement + cmpw cr2,r4,opcode # check for ss as default base + bgt cr0,12f # byte offset + beq cr1,11f # 32 bit displacement + NEXTWORD(r5) # 16 bit displacement + bgt cr1,13f # d16(base,index) +/* d16(base) */ + add offset,offset,r5 + clrlwi offset,offset,16 + bgtctr cr2 + addi base,ssb,0 + bctr +/* d32(base) */ +11: NEXTDWORD(r5) + add offset,offset,r5 + bgtctr cr2 + addi base,ssb,0 + bctr +/* 8 bit displacement */ +12: NEXTBYTE(r5) + extsb r5,r5 + bgt cr1,13f +/* d8(base) */ + extsb r6,r4 + add offset,offset,r5 + ori r6,r6,0xffff + and offset,offset,r6 + bgtctr cr2 + addi base,ssb,0 + bctr +/* d8(base,index) and d16(base,index) share this code ! */ +13: lhbrx r3,state,r3 + add offset,offset,r5 + add offset,offset,r3 + clrlwi offset,offset,16 + bgtctr cr2 + addi base,ssb,0 + bctr +/* no displacement: only indexed modes may use ss as default base */ +14: beqctr cr1 # 32 bit register indirect + clrlwi offset,offset,16 + bltctr cr1 # 16 bit register indirect +/* (base,index) */ + lhbrx r3,state,r3 # 16 bit [{bp,bx}+{si,di}] + cmpw cr2,r4,opcode # check for ss as default base + add offset,offset,r3 + clrlwi offset,offset,r3 + bgtctr+ cr2 + addi base,ssb,0 + bctr +/* sib modes, note that the size of the offset can be known from cr0 */ +15: NEXTBYTE(r7) # get sib + rlwinm r3,r7,31,0x1c # index + rlwinm offset,r7,2,0x1c # base + cmpwi cr1,r3,ESP # has index ? + bne cr0,18f # base+d8/d32 + cmpwi offset,EBP + beq 17f # d32(,index,scale) + xori r4,one,0xcc01 # build 0x0000cc00 + rlwnm r4,r4,offset,0,1 # 0 or 0xc0000000 + lwbrx offset,state,offset + cmpw cr2,r4,opcode # use ss ? + beq- cr1,16f # no index +/* (base,index,scale) */ + lwbrx r3,state,r3 + srwi r6,r7,6 + slw r3,r3,r6 + add offset,offset,r3 + bgtctr cr2 + addi base,ssb,0 + bctr +/* (base), in practice only (%esp) is coded this way */ +16: bgtctr cr2 + addi base,ssb,0 + bctr +/* d32(,index,scale) */ +17: NEXTDWORD(offset) + beqctr- cr1 # no index: very unlikely + lwbrx r3,state,r3 + srwi r6,r7,6 + slw r3,r3,r6 + add offset,offset,r3 + bctr +/* 8 or 32 bit displacement */ +18: xori r4,one,0xcc01 # build 0x0000cc00 + rlwnm r4,r4,offset,0,1 # 0 or 0xc0000000 + lwbrx offset,state,offset + cmpw cr2,r4,opcode # use ss ? + bgt cr0,20f # 8 bit offset +/* 32 bit displacement */ + NEXTDWORD(r5) + beq- cr1,21f +/* d(base,index,scale) */ +19: lwbrx r3,state,r3 + add offset,offset,r5 + add offset,offset,r3 + bgtctr cr2 + addi base,ssb,0 + bctr +/* 8 bit displacement */ +20: NEXTBYTE(r5) + extsb r5,r5 + bne+ cr1,19b +/* d(base), in practice base is %esp */ +21: add offset,offset,r5 + bgtctr- cr2 + addi base,ssb,0 + bctr + +/* + * Flag evaluation subroutines: they have not been written for performance + * since they are not often used in practice. The rule of the game was to + * write them with as few branches as possible. + * The first routines eveluate either one or 2 (ZF and SF simultaneously) + * flags and do not use r0 and r7. + * The more complex routines (_eval_above, _eval_signed and _eval_flags) + * call the former ones, using r0 as a return address save register and + * r7 as a safe temporary. + */ + +/* + * _eval_sf_zf evaluates simultaneously SF and ZF unless ZF is already valid + * and protected because it is possible, although it is exceptional, to have + * SF and ZF set at the same time after a few instructions which may leave the + * flags in this apparently inconsistent state: sahf, popf, iret and the few + * (for now unimplemented) instructions which only affect ZF (lar, lsl, arpl, + * cmpxchg8b). This also solves the obscure case of ZF set and PF clear. + * On return: SF=cr6[0], ZF=cr6[2]. + */ + +_eval_sf_zf: andis. r5,flags,ZF_PROTECT>>16 + rlwinm r3,flags,0,INCDEC_FIELD + RES_SHIFT(r4) + cntlzw r3,r3 + slw r4,result,r4 + srwi r5,r3,5 # ? use result : use op1 + rlwinm r3,r3,2,0x18 + oris flags,flags,(SF_IN_CR|SIGNED_IN_CR|ZF_IN_CR)>>16 + neg r5,r5 # mux result/op2 + slw r3,op2,r3 + and r4,r4,r5 + andc r3,r3,r5 + xoris flags,flags,(SIGNED_IN_CR)>>16 + bne- 1f # 12 instructions between set + or r3,r3,r4 # and test, good for folding + cmpwi cr6,r3,0 + blr +1: or. r3,r3,r4 + crmove SF,0 + blr + +/* + * _eval_cf may be called at any time, no other flag is affected. + * On return: CF=cr4[0], r3= CF ? 0x100:0 = CF<<8. + */ +_eval_cf: addc r3,flags,flags # CF_IN to xer[ca] + RES2CF(r4) # get 8 or 16 bit carry + subfe r3,result,op1 # generate PPC carry for + CF_ROTCNT(r5) # preceding operation + addze r3,r4 # put carry into LSB + CF_POL(r4,23) # polarity & 0x100 + oris flags,flags,(CF_IN_CR|ABOVE_IN_CR)>>16 + rlwnm r3,r3,r5,23,23 # shift carry there + xor r3,r3,r4 # CF <<8 + xoris flags,flags,(ABOVE_IN_CR)>>16 + cmplw cr4,one,r3 # sets cr4[0] + blr + +/* + * eval_of returns the overflow flag in OF_STATE field, which will be + * either 001 (OF clear) or 101 (OF set), is is only called when the two + * low order bits of OF_STATE are not 01 (otherwise it will work but + * it is an elaborate variant of a nop with a few registers destroyed) + * The code multiplexes several sources in a branchless way, was fun to write. + */ +_eval_of: GET_ADDSUB(r4) # 0(add)/1(sub) + rlwinm r3,flags,0,INCDEC_FIELD + neg r4,r4 # 0(add)/-1(sub) + eqv r5,result,op1 # result[]==op1[] (bit by bit) + cntlzw r3,r3 # inc/dec + xor r4,r4,op2 # true sign of op2 + oris r5,r5,0x0808 # bits to clear + clrlwi r6,r3,31 # 0(inc)/1(dec) + eqv r4,r4,op1 # op1[]==op2[] (bit by bit) + add r6,op2,r6 # add 1 if dec + rlwinm r3,r3,2,0x18 # incdec_shift + andc r4,r4,r5 # arithmetic overflow + slw r3,r6,r3 # shifted inc/dec result + addis r3,r3,0x8000 # compare with 0x80000000 + ori r4,r4,0x0808 # bits to set + cntlzw r3,r3 # 32 if inc/dec overflow + OF_ROTCNT(r6) + rlwimi r4,r3,18,0x00800000 # insert inc/dec overflow + rlwimi flags,one,24,OF_STATE_MASK + rlwnm r3,r4,r6,8,8 # get field + rlwimi flags,r3,3,OF_VALUE # insert OF + blr + +/* + * _eval_pf will always be called when needed (complex but infrequent), + * there are a few quirks for a branchless solution. + * On return: PF=cr0[0], PF=MSB(r3) + */ +_eval_pf: rlwinm r3,flags,0,INCDEC_FIELD + rotrwi r4,op2,4 # from inc/dec + rotrwi r5,result,4 # from result + cntlzw r3,r3 # use result if 32 + xor r4,r4,op2 + xor r5,r5,result + rlwinm r3,r3,26,0,0 # 32 becomes 0x80000000 + clrlwi r4,r4,28 + lis r6,0x9669 # constant to shift + clrlwi r5,r5,28 + rlwnm r4,r6,r4,0,0 # parity from inc/dec + rlwnm r5,r6,r5,0,0 # parity from result + andc r4,r4,r3 # select which one + and r5,r5,r3 + add. r3,r4,r5 # and test to simplify + blr # returns in r3 and cr0 set. + +/* + * _eval_af will always be called when needed (complex but infrequent): + * - if after inc, af is set when 4 low order bits of op1 are 0 + * - if after dec, af is set when 4 low order bits of op1 are 1 + * (or 0 after adding 1 as implemented here) + * - if after add/sub/adc/sbb/cmp af is set from sum of 4 LSB of op1 + * and 4 LSB of op2 (eventually complemented) plus carry in. + * - other instructions leave AF undefined so the returned value is irrelevant. + * Returned value must be masked with 0x10, since all other bits are undefined. + * There branchless code is perhaps not the most efficient, but quite parallel. + */ +_eval_af: rlwinm r3,flags,0,INCDEC_FIELD + clrlwi r5,op2,28 # 4 LSB of op2 + addc r4,flags,flags # carry_in + GET_ADDSUB(r6) + cntlzw r3,r3 # if inc/dec 16..23 else 32 + neg r6,r6 # add/sub + clrlwi r4,r3,31 # if dec 1 else 0 + xor r5,r5,r6 # conditionally complement + clrlwi r6,op1,28 # 4 LSB of op1 + add r4,op2,r4 # op2+(dec ? 1 : 0) + clrlwi r4,r4,28 # 4 LSB of op2+(dec ? 1 : 0) + adde r5,r6,r5 # op1+cy_in+(op2/~op2) + cntlzw r4,r4 # 28..31 if not AF, 32 if set + andc r5,r5,r3 # masked AF from add/sub... + andc r4,r3,r4 # masked AF from inc/dec + or r3,r4,r5 + blr + +/* + * _eval_above will only be called if ABOVE_IN_CR is not set. + * On return: ZF=cr6[2], CF=cr4[0], ABOVE=cr4[1] + */ +_eval_above: andis. r3,flags,ZF_IN_CR>>16 + mflr r0 + beql+ _eval_sf_zf + andis. r3,flags,CF_IN_CR>>16 + beql+ _eval_cf + mtlr r0 + oris flags,flags,ABOVE_IN_CR>>16 + crnor ABOVE,ZF,CF + blr + +/* _eval_signed may only be called when signed_in_cr is clear ! */ +_eval_signed: andis. r3,flags,SF_IN_CR>>16 + mflr r0 + beql+ _eval_sf_zf +# SF_IN_CR and ZF_IN_CR are set, SIGNED_IN_CR is clear + rlwinm. r3,flags,5,0,1 + xoris flags,flags,(SIGNED_IN_CR|SF_IN_CR)>>16 + bngl+ _eval_of + andis. r3,flags,OF_VALUE>>16 + mtlr r0 + crxor SLT,SF,OF + crnor SGT,SLT,ZF + blr + +_eval_flags: mflr r0 + bl _eval_cf + li r7,2 + rlwimi r7,r3,24,CF86,CF86 # 2 if CF clear, 3 if set + bl _eval_pf + andis. r4,flags,SF_IN_CR>>16 + rlwimi r7,r3,32+PF-PF86,PF86,PF86 + bl _eval_af + rlwimi r7,r3,0,AF86,AF86 + beql+ _eval_sf_zf + mfcr r3 + rlwinm. r4,flags,5,0,1 + rlwimi r7,r3,0,DF86,SF86 + ZF2ZF86(r3,r7) + bngl+ _eval_of + mtlr r0 + lis r4,0x0004 + lwz r3,eflags(state) + addi r4,r4,0x7000 + rlwimi r7,flags,17,OF86,OF86 + and r3,r3,r4 + or r3,r3,r7 + blr + +/* Quite simple for real mode, input in r4, returns in r3. */ +_segment_load: lwz r5,vbase(state) + rlwinm r3,r4,4,0xffff0 # segment selector * 16 + add r3,r3,r5 + blr + +/* To allow I/O port virtualization if necessary, code for exception in r3, +port number in r4 */ +_check_port: lwz r5,ioperm(state) + rlwinm r6,r4,29,0x1fff # 0 to 8kB + lis r0,0xffff + lhbrx r5,r5,r6 + clrlwi r6,r4,29 # modulo 8 + rlwnm r0,r0,r3,0x0f # 1, 3, or 0xf + slw r0,r0,r6 + and. r0,r0,r5 + bne- complex + blr +/* + * Instructions are in approximate functional order: + * 1) move, exchange, lea, push/pop, pusha/popa + * 2) cbw/cwde/cwd/cdq, zero/sign extending moves, in/out + * 3) arithmetic: add/sub/adc/sbb/cmp/inc/dec/neg + * 4) logical: and/or/xor/test/not/bt/btc/btr/bts/bsf/bsr + * 5) jump, call, ret + * 6) string instructions and xlat + * 7) rotate/shift/mul/div + * 8) segment register, far jumps, calls and rets, interrupts + * 9) miscellenaous (flags, bcd,...) + */ + +#define MEM offset,base +#define REG opreg,state +#define SELECTORS 32 +#define SELBASES 64 + +/* Immediate moves */ +movb_imm_reg: rlwinm opreg,opcode,2,28,29; lbz r3,1(eip) + rlwimi opreg,opcode,30,31,31; lbzu opcode,2(eip) + stbx r3,REG; GOTNEXT + +movw_imm_reg: lhz r3,1(eip); clrlslwi opreg,opcode,29,2; lbzu opcode,3(eip) + sthx r3,REG; GOTNEXT + +movl_imm_reg: lwz r3,1(eip); clrlslwi opreg,opcode,29,2; lbzu opcode,5(eip) + stwx r3,REG; GOTNEXT + +movb_imm_mem: lbz r0,1(eip); cmpwi opreg,0 + lbzu opcode,2(eip); bne- ud + stbx r0,MEM; GOTNEXT + +movw_imm_mem: lhz r0,1(eip); cmpwi opreg,0 + lbzu opcode,3(eip); bne- ud + sthx r0,MEM; GOTNEXT + +movl_imm_mem: lwz r0,1(eip); cmpwi opreg,0 + lbzu opcode,5(eip); bne- ud + stwx r0,MEM; GOTNEXT + +/* The special short form moves between memory and al/ax/eax */ +movb_al_a32: lwbrx offset,eip,one; lbz r0,AL(state); lbzu opcode,5(eip) + stbx r0,MEM; GOTNEXT + +movb_al_a16: lhbrx offset,eip,one; lbz r0,AL(state); lbzu opcode,3(eip) + stbx r0,MEM; GOTNEXT + +movw_ax_a32: lwbrx offset,eip,one; lhz r0,AX(state); lbzu opcode,5(eip) + sthx r0,MEM; GOTNEXT + +movw_ax_a16: lhbrx offset,eip,one; lhz r0,AX(state); lbzu opcode,3(eip) + sthx r0,MEM; GOTNEXT + +movl_eax_a32: lwbrx offset,eip,one; lwz r0,EAX(state); lbzu opcode,5(eip) + stwx r0,MEM; GOTNEXT + +movl_eax_a16: lhbrx offset,eip,one; lwz r0,EAX(state); lbzu opcode,3(eip) + stwx r0,MEM; GOTNEXT + +movb_a32_al: lwbrx offset,eip,one; lbzu opcode,5(eip); lbzx r0,MEM + stb r0,AL(state); GOTNEXT + +movb_a16_al: lhbrx offset,eip,one; lbzu opcode,3(eip); lbzx r0,MEM + stb r0,AL(state); GOTNEXT + +movw_a32_ax: lwbrx offset,eip,one; lbzu opcode,5(eip); lhzx r0,MEM + sth r0,AX(state); GOTNEXT + +movw_a16_ax: lhbrx offset,eip,one; lbzu opcode,3(eip); lhzx r0,MEM + sth r0,AX(state); GOTNEXT + +movl_a32_eax: lwbrx offset,eip,one; lbzu opcode,5(eip); lwzx r0,MEM + stw r0,EAX(state); GOTNEXT + +movl_a16_eax: lhbrx offset,eip,one; lbzu opcode,3(eip); lwzx r0,MEM + stw r0,EAX(state); GOTNEXT + +/* General purpose move (all are exactly 4 instructions long) */ + .align 4 +movb_reg_mem: lbzx r0,REG + NEXTBYTE(opcode) + stbx r0,MEM + GOTNEXT + +movw_reg_mem: lhzx r0,REG + NEXTBYTE(opcode) + sthx r0,MEM + GOTNEXT + +movl_reg_mem: lwzx r0,REG + NEXTBYTE(opcode) + stwx r0,MEM + GOTNEXT + +movb_mem_reg: lbzx r0,MEM + NEXTBYTE(opcode) + stbx r0,REG + GOTNEXT + +movw_mem_reg: lhzx r0,MEM + NEXTBYTE(opcode) + sthx r0,REG + GOTNEXT + +movl_mem_reg: lwzx r0,MEM + NEXTBYTE(opcode) + stwx r0,REG + GOTNEXT + +/* short form exchange ax/eax with register */ +xchgw_ax_reg: clrlslwi opreg,opcode,29,2 + lhz r3,AX(state) + lhzx r4,REG + sthx r3,REG + sth r4,AX(state) + NEXT + +xchgl_eax_reg: clrlslwi opreg,opcode,29,2 + lwz r3,EAX(state) + lwzx r4,REG + stwx r3,REG + stw r4,EAX(state) + NEXT + +/* General exchange (unlocked!) */ +xchgb_reg_mem: lbzx r3,MEM + lbzx r4,REG + NEXTBYTE(opcode) + stbx r3,REG + stbx r4,MEM + GOTNEXT + +xchgw_reg_mem: lhzx r3,MEM + lhzx r4,REG + sthx r3,REG + sthx r4,MEM + NEXT + +xchgl_reg_mem: lwzx r3,MEM + lwzx r4,REG + stwx r3,REG + stwx r4,MEM + NEXT + +/* lea, one of the simplest instructions */ +leaw: cmpw base,state + beq- ud + sthbrx offset,REG + NEXT + +leal: cmpw base,state + beq- ud + stwbrx offset,REG + NEXT + +/* Short form pushes and pops */ +pushw_sp_reg: li r3,SP + lhbrx r4,state,r3 + clrlslwi opreg,opcode,29,2 + lhzx r0,REG + addi r4,r4,-2 + sthbrx r4,state,r3 + clrlwi r4,r4,16 + sthx r0,ssb,r4 + NEXT + +pushl_sp_reg: li r3,SP + lhbrx r4,state,r3 + clrlslwi opreg,opcode,29,2 + lwzx r0,REG + addi r4,r4,-4 + sthbrx r4,state,r3 + clrlwi r4,r4,16 + stwx r0,ssb,r4 + NEXT + +popw_sp_reg: li r3,SP + lhbrx r4,state,r3 + clrlslwi opreg,opcode,29,2 + lhzx r0,ssb,r4 + addi r4,r4,2 # order is important in case of pop sp + sthbrx r4,state,r3 + sthx r0,REG + NEXT + +popl_sp_reg: li r3,SP + lhbrx r4,state,r3 + clrlslwi opreg,opcode,29,2 + lwzx r0,ssb,r4 + addi r4,r4,4 + sthbrx r4,state,r3 + stwx r0,REG + NEXT + +/* Push immediate */ +pushw_sp_imm: li r3,SP + lhbrx r4,state,r3 + lhz r0,1(eip) + addi r4,r4,-2 + sthbrx r4,state,r3 + clrlwi r4,r4,16 + lbzu opcode,3(eip) + sthx r0,ssb,r4 + GOTNEXT + +pushl_sp_imm: li r3,SP + lhbrx r4,state,r3 + lwz r0,1(eip) + addi r4,r4,-4 + sthbrx r4,state,r3 + clrlwi r4,r4,16 + lbzu opcode,5(eip) + stwx r0,ssb,r4 + GOTNEXT + +pushw_sp_imm8: li r3,SP + lhbrx r4,state,r3 + lhz r0,1(eip) + addi r4,r4,-2 + sthbrx r4,state,r3 + clrlwi r4,r4,16 + lbzu opcode,2(eip) + extsb r0,r0 + sthx r0,ssb,r4 + GOTNEXT + +pushl_sp_imm8: li r3,SP + lhbrx r4,state,r3 + lhz r0,1(eip) + addi r4,r4,-4 + sthbrx r4,state,r3 + clrlwi r4,r4,16 + lbzu opcode,2(eip) + extsb r0,r0 + stwx r0,ssb,r4 + GOTNEXT + +/* General push/pop */ +pushw_sp: lhbrx r0,MEM + li r3,SP + lhbrx r4,state,r3 + addi r4,r4,-2 + sthbrx r4,state,r3 + clrlwi r4,r4,16 + sthbrx r0,r4,ssb + NEXT + +pushl_sp: lwbrx r0,MEM + li r3,SP + lhbrx r4,state,r3 + addi r4,r4,-4 + sthbrx r4,state,r3 + clrlwi r4,r4,16 + stwbrx r0,r4,ssb + NEXT + +/* pop is an exception with 32 bit addressing modes, it is possible +to calculate wrongly the address when esp is used as base. But 16 bit +addressing modes are safe */ + +popw_sp_a16: cmpw cr1,opreg,0 # first check the opcode + li r3,SP + lhbrx r4,state,r3 + bne- cr1,ud + lhzx r0,ssb,r4 + addi r4,r4,2 + sthx r0,MEM + sthbrx r4,state,r3 + NEXT + +popl_sp_a16: cmpw cr1,opreg,0 + li r3,SP + lhbrx r4,state,r3 + bne- cr1,ud + lwzx r0,ssb,r4 + addi r4,r4,2 + stwx r0,MEM + sthbrx r4,state,r3 + NEXT + +/* 32 bit addressing modes for pop not implemented for now. */ + .equ popw_sp_a32,unimpl + .equ popl_sp_a32,unimpl + +/* pusha/popa */ +pushaw_sp: li r3,SP + li r0,8 + lhbrx r4,r3,state + mtctr r0 + addi r5,state,-4 +1: addi r4,r4,-2 + lhzu r6,4(r5) + clrlwi r4,r4,16 + sthx r6,ssb,r4 + bdnz 1b + sthbrx r4,r3,state # new sp + NEXT + +pushal_sp: li r3,SP + li r0,8 + lhbrx r4,r3,state + mtctr r0 + addi r5,state,-4 +1: addi r4,r4,-4 + lwzu r6,4(r5) + clrlwi r4,r4,16 + stwx r6,ssb,r4 + bdnz 1b + sthbrx r4,r3,state # new sp + NEXT + +popaw_sp: li r3,SP + li r0,8 + lhbrx r4,state,r3 + mtctr r0 + addi r5,state,32 +1: lhzx r6,ssb,r4 + addi r4,r4,2 + sthu r6,-4(r5) + clrlwi r4,r4,16 + bdnz 1b + sthbrx r4,r3,state # updated sp + NEXT + +popal_sp: li r3,SP + lis r0,0xef00 # mask to skip esp + lhbrx r4,state,r3 + addi r5,state,32 +1: add. r0,r0,r0 + lwzx r6,ssb,r4 + addi r4,r4,4 + stwu r6,-4(r5) + clrlwi r4,r4,16 + blt 1b + addi r6,r6,-4 + beq 2f + addi r4,r4,4 + clrlwi r4,r4,16 + b 1b +2: sthbrx r4,state,r3 # updated sp + NEXT + +/* Moves with zero or sign extension: first the special cases */ +cbw: lbz r3,AL(state) + extsb r3,r3 + sthbrx r3,AX,state + NEXT + +cwde: lhbrx r3,AX,state + extsh r3,r3 + stwbrx r3,EAX,state + NEXT + +cwd: lbz r3,AH(state) + extsb r3,r3 + srwi r3,r3,8 # get sign bits + sth r3,DX(state) + NEXT + +cdq: lwbrx r3,EAX,state + srawi r3,r3,31 + stw r3,EDX(state) # byte order unimportant ! + NEXT + +/* The move with zero or sign extension are special since the source +and destination are not the same size. The register describing the destination +is modified to take this into account. */ + +movsbw: lbzx r3,MEM + rlwimi opreg,opreg,4,0x10 + extsb r3,r3 + rlwinm opreg,opreg,0,0x1c + sthbrx r3,REG + NEXT + +movsbl: lbzx r3,MEM + rlwimi opreg,opreg,4,0x10 + extsb r3,r3 + rlwinm opreg,opreg,0,0x1c + stwbrx r3,REG + NEXT + + .equ movsww, movw_mem_reg + +movswl: lhbrx r3,MEM + extsh r3,r3 + stwbrx r3,REG + NEXT + +movzbw: lbzx r3,MEM + rlwimi opreg,opreg,4,0x10 + rlwinm opreg,opreg,0,0x1c + sthbrx r3,REG + NEXT + +movzbl: lbzx r3,MEM + rlwimi opreg,opreg,4,0x10 + rlwinm opreg,opreg,0,0x1c + stwbrx r3,REG + NEXT + + .equ movzww, movw_mem_reg + +movzwl: lhbrx r3,MEM + stwbrx r3,REG + NEXT + +/* Byte swapping */ +bswap: clrlslwi opreg,opcode,29,2 # extract reg from opcode + lwbrx r0,REG + stwx r0,REG + NEXT + +/* Input/output */ +inb_port_al: NEXTBYTE(r4) + b 1f +inb_dx_al: li r4,DX + lhbrx r4,r4,state +1: li r3,code_inb + bl _check_port + lwz r3,iobase(state) + lbzx r5,r4,r3 + eieio + stb r5,AL(state) + NEXT + +inw_port_ax: NEXTBYTE(r4) + b 1f +inw_dx_ax: li r4,DX + lhbrx r4,r4,state +1: li r3,code_inw + bl _check_port + lwz r3,iobase(state) + lhzx r5,r4,r3 + eieio + sth r5,AX(state) + NEXT + +inl_port_eax: NEXTBYTE(r4) + b 1f +inl_dx_eax: li r4,DX + lhbrx r4,r4,state +1: li r3,code_inl + bl _check_port + lwz r3,iobase(state) + lwzx r5,r4,r3 + eieio + stw r5,EAX(state) + NEXT + +outb_al_port: NEXTBYTE(r4) + b 1f +outb_al_dx: li r4,DX + lhbrx r4,r4,state +1: li r3,code_outb + bl _check_port + lwz r3,iobase(state) + lbz r5,AL(state) + stbx r5,r4,r3 + eieio + NEXT + +outw_ax_port: NEXTBYTE(r4) + b 1f +outw_ax_dx: li r4,DX + lhbrx r4,r4,state +1: li r3,code_outw + bl _check_port + lwz r3,iobase(state) + lhz r5,AX(state) + sthx r5,r4,r3 + eieio + NEXT + +outl_eax_port: NEXTBYTE(r4) + b 1f +outl_eax_dx: li r4,DX + lhbrx r4,r4,state +1: li r3,code_outl + bl _check_port + lwz r4,iobase(state) + lwz r5,EAX(state) + stwx r5,r4,r3 + eieio + NEXT + + +/* Macro used for add and sub */ +#define ARITH(op,fl) \ +op##b_reg_mem: lbzx op1,MEM; SET_FLAGS(fl(B)); lbzx op2,REG; \ + op result,op1,op2; \ + stbx result,MEM; NEXT; \ +op##w_reg_mem: lhbrx op1,MEM; SET_FLAGS(fl(W)); lhbrx op2,REG; \ + op result,op1,op2; \ + sthbrx result,MEM; NEXT; \ +op##l_reg_mem: lwbrx op1,MEM; SET_FLAGS(fl(L)); lwbrx op2,REG; \ + op result,op1,op2; \ + stwbrx result,MEM; NEXT; \ +op##b_mem_reg: lbzx op2,MEM; SET_FLAGS(fl(B)); lbzx op1,REG; \ + op result,op1,op2; \ + stbx result,REG; NEXT; \ +op##w_mem_reg: lhbrx op2,MEM; SET_FLAGS(fl(W)); lhbrx op1,REG; \ + op result,op1,op2; \ + sthbrx result,REG; NEXT; \ +op##l_mem_reg: lwbrx op2,MEM; SET_FLAGS(fl(L)); lwbrx op1,REG; \ + op result,op1,op2; \ + stwbrx result,REG; NEXT; \ +op##b_imm_al: addi base,state,0; li offset,AL; \ +op##b_imm: lbzx op1,MEM; SET_FLAGS(fl(B)); lbz op2,1(eip); \ + op result,op1,op2; \ + lbzu opcode,2(eip); \ + stbx result,MEM; GOTNEXT; \ +op##w_imm_ax: addi base,state,0; li offset,AX; \ +op##w_imm: lhbrx op1,MEM; SET_FLAGS(fl(W)); lhbrx op2,eip,one; \ + op result,op1,op2; \ + lbzu opcode,3(eip); \ + sthbrx result,MEM; GOTNEXT; \ +op##w_imm8: lbz op2,1(eip); SET_FLAGS(fl(W)); lhbrx op1,MEM; \ + extsb op2,op2; clrlwi op2,op2,16; \ + op result,op1,op2; \ + lbzu opcode,2(eip); \ + sthbrx result,MEM; GOTNEXT; \ +op##l_imm_eax: addi base,state,0; li offset,EAX; \ +op##l_imm: lwbrx op1,MEM; SET_FLAGS(fl(L)); lwbrx op2,eip,one; \ + op result,op1,op2; lbzu opcode,5(eip); \ + stwbrx result,MEM; GOTNEXT; \ +op##l_imm8: lbz op2,1(eip); SET_FLAGS(fl(L)); lwbrx op1,MEM; \ + extsb op2,op2; lbzu opcode,2(eip); \ + op result,op1,op2; \ + stwbrx result,MEM; GOTNEXT + + ARITH(add, FLAGS_ADD) + ARITH(sub, FLAGS_SUB) + +#define adc(result, op1, op2) adde result,op1,op2 +#define sbb(result, op1, op2) subfe result,op2,op1 + +#define ARITH_WITH_CARRY(op, fl) \ +op##b_reg_mem: lbzx op1,MEM; bl carryfor##op; lbzx op2,REG; \ + ADD_FLAGS(fl(B)); op(result, op1, op2); \ + stbx result,MEM; NEXT; \ +op##w_reg_mem: lhbrx op1,MEM; bl carryfor##op; lhbrx op2,REG; \ + ADD_FLAGS(fl(W)); op(result, op1, op2); \ + sthbrx result,MEM; NEXT; \ +op##l_reg_mem: lwbrx op1,MEM; bl carryfor##op; lwbrx op2,REG; \ + ADD_FLAGS(fl(L)); op(result, op1, op2); \ + stwbrx result,MEM; NEXT; \ +op##b_mem_reg: lbzx op1,MEM; bl carryfor##op; lbzx op2,REG; \ + ADD_FLAGS(fl(B)); op(result, op1, op2); \ + stbx result,REG; NEXT; \ +op##w_mem_reg: lhbrx op1,MEM; bl carryfor##op; lhbrx op2,REG; \ + ADD_FLAGS(fl(W)); op(result, op1, op2); \ + sthbrx result,REG; NEXT; \ +op##l_mem_reg: lwbrx op1,MEM; bl carryfor##op; lwbrx op2,REG; \ + ADD_FLAGS(fl(L)); op(result, op1, op2); \ + stwbrx result,REG; NEXT; \ +op##b_imm_al: addi base,state,0; li offset,AL; \ +op##b_imm: lbzx op1,MEM; bl carryfor##op; lbz op2,1(eip); \ + ADD_FLAGS(fl(B)); lbzu opcode,2(eip); op(result, op1, op2); \ + stbx result,MEM; GOTNEXT; \ +op##w_imm_ax: addi base,state,0; li offset,AX; \ +op##w_imm: lhbrx op1,MEM; bl carryfor##op; lhbrx op2,eip,one; \ + ADD_FLAGS(fl(W)); lbzu opcode,3(eip); op(result, op1, op2); \ + sthbrx result,MEM; GOTNEXT; \ +op##w_imm8: lbz op2,1(eip); bl carryfor##op; lhbrx op1,MEM; \ + extsb op2,op2; ADD_FLAGS(fl(W)); clrlwi op2,op2,16; \ + lbzu opcode,2(eip); op(result, op1, op2); \ + sthbrx result,MEM; GOTNEXT; \ +op##l_imm_eax: addi base,state,0; li offset,EAX; \ +op##l_imm: lwbrx op1,MEM; bl carryfor##op; lwbrx op2,eip,one; \ + ADD_FLAGS(fl(L)); lbzu opcode,5(eip); op(result, op1, op2); \ + stwbrx result,MEM; GOTNEXT; \ +op##l_imm8: lbz op2,1(eip); SET_FLAGS(fl(L)); lwbrx op1,MEM; \ + extsb op2,op2; lbzu opcode,2(eip); \ + op(result, op1, op2); \ + stwbrx result,MEM; GOTNEXT + +carryforadc: addc r3,flags,flags # CF_IN to xer[ca] + RES2CF(r4) # get 8 or 16 bit carry + subfe r3,result,op1 # generate PPC carry for + CF_ROTCNT(r5) # preceding operation + addze r3,r4 # 32 bit carry in LSB + CF_POL(r4,23) # polarity + rlwnm r3,r3,r5,0x100 # shift carry there + xor flags,r4,r3 # CF86 ? 0x100 : 0 + addic r4,r3,0xffffff00 # set xer[ca] + rlwinm flags,r3,23,CF_IN + blr + + ARITH_WITH_CARRY(adc, FLAGS_ADD) + +/* for sbb the input carry must be the complement of the x86 carry */ +carryforsbb: addc r3,flags,flags # CF_IN to xer[ca] + RES2CF(r4) # 8/16 bit carry from result + subfe r3,result,op1 + CF_ROTCNT(r5) + addze r3,r4 + CF_POL(r4,23) + rlwnm r3,r3,r5,0x100 + eqv flags,r4,r3 # CF86 ? 0xfffffeff:0xffffffff + addic r4,r3,1 # set xer[ca] + rlwinm flags,r3,23,CF_IN # keep only the carry + blr + + ARITH_WITH_CARRY(sbb, FLAGS_SBB) + +cmpb_reg_mem: lbzx op1,MEM + SET_FLAGS(FLAGS_CMP(B)) + lbzx op2,REG + extsb r3,op1 + cmplw cr4,op1,op2 + extsb r4,op2 + sub result,op1,op2 + cmpw cr6,r3,r4 + NEXT + +cmpw_reg_mem: lhbrx op1,MEM + SET_FLAGS(FLAGS_CMP(W)) + lhbrx op2,REG + extsh r3,op1 + cmplw cr4,op1,op2 + extsh r4,op2 + sub result,op1,op2 + cmpw cr6,r3,r4 + NEXT + +cmpl_reg_mem: lwbrx op1,MEM + SET_FLAGS(FLAGS_CMP(L)) + lwbrx op2,REG + cmplw cr4,op1,op2 + sub result,op1,op2 + cmpw cr6,op1,op2 + NEXT + +cmpb_mem_reg: lbzx op2,MEM + SET_FLAGS(FLAGS_CMP(B)) + lbzx op1,REG + extsb r4,op2 + cmplw cr4,op1,op2 + extsb r3,op1 + sub result,op1,op2 + cmpw cr6,r3,r4 + NEXT + +cmpw_mem_reg: lhbrx op2,MEM + SET_FLAGS(FLAGS_CMP(W)) + lhbrx op1,REG + extsh r4,op2 + cmplw cr4,op1,op2 + extsh r3,op1 + sub result,op1,op2 + cmpw cr6,r3,r4 + NEXT + +cmpl_mem_reg: lwbrx op2,MEM + SET_FLAGS(FLAGS_CMP(L)) + lwbrx op1,REG + cmpw cr6,op1,op2 + sub result,op1,op2 + cmplw cr4,op1,op2 + NEXT + +cmpb_imm_al: addi base,state,0 + li offset,AL +cmpb_imm: lbzx op1,MEM + SET_FLAGS(FLAGS_CMP(B)) + lbz op2,1(eip) + extsb r3,op1 + cmplw cr4,op1,op2 + lbzu opcode,2(eip) + extsb r4,op2 + sub result,op1,op2 + cmpw cr6,r3,r4 + GOTNEXT + +cmpw_imm_ax: addi base,state,0 + li offset,AX +cmpw_imm: lhbrx op1,MEM + SET_FLAGS(FLAGS_CMP(W)) + lhbrx op2,eip,one + extsh r3,op1 + cmplw cr4,op1,op2 + lbzu opcode,3(eip) + extsh r4,op2 + sub result,op1,op2 + cmpw cr6,r3,r4 + GOTNEXT + +cmpw_imm8: lbz op2,1(eip) + SET_FLAGS(FLAGS_CMP(W)) + lhbrx op1,MEM + extsb r4,op2 + extsh r3,op1 + lbzu opcode,2(eip) + clrlwi op2,r4,16 + cmpw cr6,r3,r4 + sub result,op1,op2 + cmplw cr4,op1,op2 + GOTNEXT + +cmpl_imm_eax: addi base,state,0 + li offset,EAX +cmpl_imm: lwbrx op1,MEM + SET_FLAGS(FLAGS_CMP(L)) + lwbrx op2,eip,one + cmpw cr6,op1,op2 + lbzu opcode,5(eip) + sub result,op1,op2 + cmplw cr4,op1,op2 + GOTNEXT + +cmpl_imm8: lbz op2,1(eip) + SET_FLAGS(FLAGS_CMP(L)) + lwbrx op1,MEM + extsb op2,op2 + lbzu opcode,2(eip) + cmpw cr6,op1,op2 + sub result,op1,op2 + cmplw cr4,op1,op2 + GOTNEXT + +/* Increment and decrement */ +incb: lbzx op2,MEM + INC_FLAGS(B) + addi op2,op2,1 + stbx op2,MEM + NEXT + +incw_reg: clrlslwi opreg,opcode,29,2 # extract reg from opcode + lhbrx op2,REG + INC_FLAGS(W) + addi op2,op2,1 + sthbrx op2,REG + NEXT + +incw: lhbrx op2,MEM + INC_FLAGS(W) + addi op2,op2,1 + sthbrx op2,MEM + NEXT + +incl_reg: clrlslwi opreg,opcode,29,2 + lwbrx op2,REG + INC_FLAGS(L) + addi op2,op2,1 + sthbrx op2,REG + NEXT + +incl: lwbrx op2,MEM + INC_FLAGS(L) + addi op2,op2,1 + stwbrx op2,MEM + NEXT + +decb: lbzx op2,MEM + DEC_FLAGS(B) + addi op2,op2,-1 + stbx op2,MEM + NEXT + +decw_reg: clrlslwi opreg,opcode,29,2 # extract reg from opcode + lhbrx op2,REG + DEC_FLAGS(W) + addi op2,op2,-1 + sthbrx op2,REG + NEXT + +decw: lhbrx op2,MEM + DEC_FLAGS(W) + addi op2,op2,-1 + sthbrx op2,MEM + NEXT + +decl_reg: clrlslwi opreg,opcode,29,2 + lwbrx op2,REG + DEC_FLAGS(L) + addi op2,op2,-1 + sthbrx op2,REG + NEXT + +decl: lwbrx op2,MEM + DEC_FLAGS(L) + addi op2,op2,-1 + stwbrx op2,MEM + NEXT + +negb: lbzx op2,MEM + SET_FLAGS(FLAGS_SUB(B)) + neg result,op2 + li op1,0 + stbx result,MEM + NEXT + +negw: lhbrx op2,MEM + SET_FLAGS(FLAGS_SUB(W)) + neg result,op2 + li op1,0 + sthbrx r0,MEM + NEXT + +negl: lwbrx op2,MEM + SET_FLAGS(FLAGS_SUB(L)) + subfic result,op2,0 + li op1,0 + stwbrx result,MEM + NEXT + +/* Macro used to generate code for OR/AND/XOR */ +#define LOGICAL(op) \ +op##b_reg_mem: lbzx op1,MEM; SET_FLAGS(FLAGS_LOG(B)); lbzx op2,REG; \ + op result,op1,op2; \ + stbx result,MEM; NEXT; \ +op##w_reg_mem: lhbrx op1,MEM; SET_FLAGS(FLAGS_LOG(W)); lhbrx op2,REG; \ + op result,op1,op2; \ + sthbrx result,MEM; NEXT; \ +op##l_reg_mem: lwbrx op1,MEM; SET_FLAGS(FLAGS_LOG(L)); lwbrx op2,REG; \ + op result,op1,op2; \ + stwbrx result,MEM; NEXT; \ +op##b_mem_reg: lbzx op1,MEM; SET_FLAGS(FLAGS_LOG(B)); lbzx op2,REG; \ + op result,op1,op2; \ + stbx result,REG; NEXT; \ +op##w_mem_reg: lhbrx op2,MEM; SET_FLAGS(FLAGS_LOG(W)); lhbrx op1,REG; \ + op result,op1,op2; \ + sthbrx result,REG; NEXT; \ +op##l_mem_reg: lwbrx op2,MEM; SET_FLAGS(FLAGS_LOG(L)); lwbrx op1,REG; \ + op result,op1,op2; \ + stwbrx result,REG; NEXT; \ +op##b_imm_al: addi base,state,0; li offset,AL; \ +op##b_imm: lbzx op1,MEM; SET_FLAGS(FLAGS_LOG(B)); lbz op2,1(eip); \ + op result,op1,op2; lbzu opcode,2(eip); \ + stbx result,MEM; GOTNEXT; \ +op##w_imm_ax: addi base,state,0; li offset,AX; \ +op##w_imm: lhbrx op1,MEM; SET_FLAGS(FLAGS_LOG(W)); lhbrx op2,eip,one; \ + op result,op1,op2; lbzu opcode,3(eip); \ + sthbrx result,MEM; GOTNEXT; \ +op##w_imm8: lbz op2,1(eip); SET_FLAGS(FLAGS_LOG(W)); lhbrx op1,MEM; \ + extsb op2,op2; lbzu opcode,2(eip); \ + op result,op1,op2; \ + sthbrx result,MEM; GOTNEXT; \ +op##l_imm_eax: addi base,state,0; li offset,EAX; \ +op##l_imm: lwbrx op1,MEM; SET_FLAGS(FLAGS_LOG(L)); lwbrx op2,eip,one; \ + op result,op1,op2; lbzu opcode,5(eip); \ + stwbrx result,MEM; GOTNEXT; \ +op##l_imm8: lbz op2,1(eip); SET_FLAGS(FLAGS_LOG(L)); lwbrx op1,MEM; \ + extsb op2,op2; lbzu opcode,2(eip); \ + op result,op1,op2; \ + stwbrx result,MEM; GOTNEXT + + LOGICAL(or) + + LOGICAL(and) + + LOGICAL(xor) + +testb_reg_mem: lbzx op1,MEM + SET_FLAGS(FLAGS_TEST(B)) + lbzx op2,REG + and result,op1,op2 + extsb r3,result + cmpwi cr6,r3,0 + NEXT + +testw_reg_mem: lhbrx op1,MEM + SET_FLAGS(FLAGS_TEST(W)) + lhbrx op2,REG + and result,op1,op2 + extsh r3,result + cmpwi cr6,r3,0 + NEXT + +testl_reg_mem: lwbrx r3,MEM + SET_FLAGS(FLAGS_TEST(L)) + lwbrx r4,REG + and result,op1,op2 + cmpwi cr6,result,0 + NEXT + +testb_imm_al: addi base,state,0 + li offset,AL +testb_imm: lbzx op1,MEM + SET_FLAGS(FLAGS_TEST(B)) + lbz op2,1(eip) + and result,op1,op2 + lbzu opcode,2(eip) + extsb r3,result + cmpwi cr6,r3,0 + GOTNEXT + +testw_imm_ax: addi base,state,0 + li offset,AX +testw_imm: lhbrx op1,MEM + SET_FLAGS(FLAGS_TEST(W)) + lhbrx op2,eip,one + and result,op1,op2 + lbzu opcode,3(eip) + extsh r3,result + cmpwi cr6,r3,0 + GOTNEXT + +testl_imm_eax: addi base,state,0 + li offset,EAX +testl_imm: lwbrx op1,MEM + SET_FLAGS(FLAGS_TEST(L)) + lwbrx op2,eip,one + and result,r3,r4 + lbzu opcode,5(eip) + cmpwi cr6,result,0 + GOTNEXT + +/* Not does not affect flags */ +notb: lbzx r3,MEM + xori r3,r3,255 + stbx r3,MEM + NEXT + +notw: lhzx r3,MEM + xori r3,r3,65535 + sthx r3,MEM + NEXT + +notl: lwzx r3,MEM + not r3,r3 + stwx r3,MEM + NEXT + +boundw: lhbrx r4,REG + li r3,code_bound + lhbrx r5,MEM + addi offset,offset,2 + extsh r4,r4 + lhbrx r6,MEM + extsh r5,r5 + cmpw r4,r5 + extsh r6,r6 + blt- complex + cmpw r4,r6 + ble+ nop + b complex + +boundl: lwbrx r4,REG + li r3,code_bound + lwbrx r5,MEM + addi offset,offset,4 + lwbrx r6,MEM + cmpw r4,r5 + blt- complex + cmpw r4,r6 + ble+ nop + b complex + +/* Bit test and modify instructions */ + +/* Common routine: bit index in op2, returns memory value in r3, mask in op2, +and of mask and value in op1. CF flag is set as with 32 bit add when bit is +non zero since result (which is cleared) will be less than op1, and in cr4, +all other flags are undefined from Intel doc. Here OF and SF are cleared +and ZF is set as a side effect of result being cleared. */ +_setup_bitw: cmpw base,state + SET_FLAGS(FLAGS_BTEST) + extsh op2,op2 + beq- 1f + srawi r4,op2,4 + add offset,offset,r4 +1: clrlwi op2,op2,28 # true bit index + lhbrx r3,MEM + slw op2,one,op2 # build mask + li result,0 # implicitly sets CF + and op1,r3,op2 # if resultxer[ca] + RES2CF(r6) + subfe r4,result,op1 + mulli r5,r3,29 # 29=ceil(256/9) + CF_ROTCNT(r7) + addze r6,r6 + CF_POL_INSERT(r0,23) + srwi r5,r5,8 # count/9 + rlwnm r6,r6,r7,0x100 + xor r0,r0,r6 # (23)0:CF:data8 + rlwimi r5,r5,3,26,28 # 9*(count/9) + rlwimi r0,r0,23,0,7 # CF:(data8):(14)0:CF:data8 + sub r3,r3,r5 # count%9 + beq- nop # no flags changed if count 0 + ROTATE_FLAGS + rlwnm r0,r0,r3,0x000001ff # (23)0:NewCF:Result8 + rlwimi flags,r0,19,CF_VALUE + stbx r0,MEM + rlwimi flags,r0,18,OF_XOR + NEXT + +/* Word rcl is performed on 33 bits (CF:data16:CF:(15 MSB of data16) */ +rclw_imm: NEXTBYTE(r3) + b 1f +rclw_cl: lbz r3,CL(state) + b 1f +rclw_1: li r3,1 +1: lhbrx r0,MEM + andi. r3,r3,31 # count=count%32 + addc r4,flags,flags + RES2CF(r6) + subfe r4,result,op1 + addi r5,r3,15 # modulo 17: >=32 if >=17 + CF_ROTCNT(r7) + addze r6,r6 + addi r7,r7,8 + CF_POL_INSERT(r0,15) + srwi r5,r5,5 # count/17 + rlwnm r6,r6,r7,0x10000 + rlwimi r5,r5,4,27,27 # 17*(count/17) + xor r0,r0,r6 # (15)0:CF:data16 + sub r3,r3,r5 # count%17 + rlwinm r4,r0,15,0xffff0000 # CF:(15 MSB of data16):(16)0 + slw r0,r0,r3 # New carry and MSBs + rlwnm r4,r4,r3,16,31 # New LSBs + beq- nop # no flags changed if count 0 + ROTATE_FLAGS + add r0,r0,r4 # result + rlwimi flags,r0,11,CF_VALUE + sthbrx r0,MEM + rlwimi flags,r0,10,OF_XOR + NEXT + +/* Longword rcl only needs 64 bits because the maximum rotate count is 31 ! */ +rcll_imm: NEXTBYTE(r3) + b 1f +rcll_cl: lbz r3,CL(state) + b 1f +rcll_1: li r3,1 +1: lwbrx r0,MEM + andi. r3,r3,31 # count=count%32 + addc r4,r4,flags # ~XER[CA] + RES2CF(r6) + subfe r4,result,op1 + CF_ROTCNT(r7) + addze r6,r6 + srwi r4,r0,1 # 0:(31 MSB of data32) + addi r7,r7,23 + CF_POL_INSERT(r4,0) + rlwnm r6,r6,r7,0,0 + beq- nop # no flags changed if count 0 + subfic r5,r3,32 + xor r4,r4,r6 + ROTATE_FLAGS + slw r0,r0,r3 # New MSBs + srw r5,r4,r5 # New LSBs + rlwnm r4,r4,r3,0,0 # New Carry + add r0,r0,r5 # result + rlwimi flags,r4,28,CF_VALUE + rlwimi flags,r0,27,OF_XOR + stwbrx r0,MEM + NEXT + +/* right rotates through carry are even worse because PPC only has a left +rotate instruction. Somewhat tough when combined with modulo 9, 17, or +33 operation and the rules of OF and CF flag settings. */ +/* Byte rcr is performed on 17 bits */ +rcrb_imm: NEXTBYTE(r3) + b 1f +rcrb_cl: lbz r3,CL(state) + b 1f +rcrb_1: li r3,1 +1: lbzx r0,MEM + andi. r3,r3,31 # count%32 + addc r4,flags,flags # cf_in->xer[ca] + RES2CF(r6) + mulli r5,r3,29 # 29=ceil(256/9) + subfe r4,result,op1 + CF_ROTCNT(r7) + addze r6,r6 + CF_POL_INSERT(r0,23) + srwi r5,r5,8 # count/9 + rlwimi r0,r0,9,0x0001fe00 # (15)0:data8:0:data8 + rlwnm r6,r6,r7,0x100 + rlwimi r5,r5,3,26,28 # 9*(count/9) + xor r0,r0,r6 # (15)0:data8:CF:data8 + sub r3,r3,r5 # count%9 + beq- nop # no flags changed if count 0 + ROTATE_FLAGS + srw r0,r0,r3 # (23)junk:NewCF:Result8 + rlwimi flags,r0,19,CF_VALUE|OF_XOR + stbx r0,MEM + NEXT + +/* Word rcr is a 33 bit right shift with a quirk, because the 33rd bit +is only needed when the rotate count is 16 and rotating left or right +by 16 a 32 bit quantity is the same ! */ +rcrw_imm: NEXTBYTE(r3) + b 1f +rcrw_cl: lbz r3,CL(state) + b 1f +rcrw_1: li r3,1 +1: lhbrx r0,MEM + andi. r3,r3,31 # count%32 + addc r4,flags,flags # cf_in->xer[ca] + RES2CF(r6) + subfe r4,result,op1 + addi r5,r3,15 # >=32 if >=17 + CF_ROTCNT(r7) + addze r6,r6 + addi r7,r7,8 + CF_POL_INSERT(r0,15) + srwi r5,r5,5 # count/17 + rlwnm r6,r6,r7,0x10000 + rlwinm r7,r0,16,0x01 # MSB of data16 + rlwimi r0,r0,17,0xfffe0000 # (15 MSB of data16):0:data16 + rlwimi r5,r5,4,27,27 # 17*(count/17) + xor r0,r0,r6 # (15 MSB of data16):CF:data16 + sub r3,r3,r5 # count%17 + beq- nop # no flags changed if count 0 + srw r0,r0,r3 # shift right + rlwnm r7,r7,r3,0x10000 # just in case count=16 + ROTATE_FLAGS + add r0,r0,r7 # junk15:NewCF:result16 + rlwimi flags,r0,11,CF_VALUE|OF_XOR + sthbrx r0,MEM + NEXT + +/* Longword rcr need only 64 bits since the rotate count is limited to 31 */ +rcrl_imm: NEXTBYTE(r3) + b 1f +rcrl_cl: lbz r3,CL(state) + b 1f +rcrl_1: li r3,1 +1: lwbrx r0,MEM + andi. r3,r3,31 # count%32 + addc r4,flags,flags + RES2CF(r6) + subfe r4,result,op1 + CF_ROTCNT(r7) + slwi r4,r0,1 # (31MSB of data32):0 + addze r6,r6 + addi r7,r7,24 + CF_POL_INSERT(r4,31) + rlwnm r6,r6,r7,0x01 + beq- nop # no flags changed if count 0 + subfic r7,r3,32 + xor r4,r4,r6 + srw r0,r0,r3 # Result LSB + slw r5,r4,r7 # Result MSB + srw r4,r4,r3 # NewCF in LSB + add r0,r0,r5 # result + rlwimi flags,r4,27,CF_VALUE + stwbrx r0,MEM + rlwimi flags,r0,27,OF_XOR + NEXT + +/* After the rotates through carry, normal rotates are so simple ! */ +rolb_imm: NEXTBYTE(r3) + b 1f +rolb_cl: lbz r3,CL(state) + b 1f +rolb_1: li r3,1 +1: lbzx r0,MEM + andi. r4,r3,31 # count%32 == 0 ? + clrlwi r3,r3,29 # count%8 + rlwimi r0,r0,24,0xff000000 # replicate for shift in + beq- nop # no flags changed if count 0 + ROTATE_FLAGS + rotlw r0,r0,r3 + rlwimi flags,r0,27,CF_VALUE # New CF + stbx r0,MEM + rlwimi flags,r0,26,OF_XOR # New OF (CF xor MSB) + NEXT + +rolw_imm: NEXTBYTE(r3) + b 1f +rolw_cl: lbz r3,CL(state) + b 1f +rolw_1: li r3,1 +1: lhbrx r0,MEM + andi. r3,r3,31 + rlwimi r0,r0,16,0,15 # duplicate + beq- nop # no flags changed if count 0 + ROTATE_FLAGS + rotlw r0,r0,r3 # result word duplicated + rlwimi flags,r0,27,CF_VALUE # New CF + sthbrx r0,MEM + rlwimi flags,r0,26,OF_XOR # New OF (CF xor MSB) + NEXT + +roll_imm: NEXTBYTE(r3) + b 1f +roll_cl: lbz r3,CL(state) + b 1f +roll_1: li r3,1 +1: lwbrx r0,MEM + andi. r3,r3,31 + beq- nop # no flags changed if count 0 + ROTATE_FLAGS + rotlw r0,r0,r3 # result + rlwimi flags,r0,27,CF_VALUE # New CF + stwbrx r0,MEM + rlwimi flags,r0,26,OF_XOR # New OF (CF xor MSB) + NEXT + +rorb_imm: NEXTBYTE(r3) + b 1f +rorb_cl: lbz r3,CL(state) + b 1f +rorb_1: li r3,1 +1: lbzx r0,MEM + andi. r4,r3,31 # count%32 == 0 ? + clrlwi r3,r3,29 # count%8 + rlwimi r0,r0,8,0x0000ff00 # replicate for shift in + beq- nop # no flags changed if count 0 + ROTATE_FLAGS + srw r0,r0,r3 + rlwimi flags,r0,20,CF_VALUE + stbx r0,MEM + rlwimi flags,r0,19,OF_XOR + NEXT + +rorw_imm: NEXTBYTE(r3) + b 1f +rorw_cl: lbz r3,CL(state) + b 1f +rorw_1: li r3,1 +1: lhbrx r0,MEM + andi. r4,r3,31 + clrlwi r3,r3,28 # count %16 + rlwimi r0,r0,16,0xffff0000 # duplicate + beq- nop # no flags changed if count 0 + ROTATE_FLAGS + srw r0,r0,r3 # junk16:result16 + rlwimi flags,r0,12,CF_VALUE + sthbrx r0,MEM + rlwimi flags,r0,11,OF_XOR + NEXT + +rorl_imm: NEXTBYTE(r3) + b 1f +rorl_cl: lbz r3,CL(state) + b 1f +rorl_1: li r3,1 +1: lwbrx r0,MEM + andi. r4,r3,31 + neg r3,r3 + beq- nop # no flags changed if count 0 + ROTATE_FLAGS + rotlw r0,r0,r3 # result + rlwimi flags,r0,28,CF_VALUE + stwbrx r0,MEM + rlwimi flags,r0,27,OF_XOR + NEXT + +/* Right arithmetic shifts: they clear OF whenever count!=0 */ +#define SAR_FLAGS CF_ZERO|OF_ZERO|RESL +sarb_imm: NEXTBYTE(r3) + b 1f +sarb_cl: lbz r3,CL(state) + b 1f +sarb_1: li r3,1 +1: lbzx r4,MEM + andi. r3,r3,31 + addi r5,r3,-1 + extsb r4,r4 + beq- nop # no flags changed if count 0 + SET_FLAGS(SAR_FLAGS) + sraw result,r4,r3 + srw r5,r4,r5 + stbx result,MEM + rlwimi flags,r5,27,CF_VALUE + NEXT + +sarw_imm: NEXTBYTE(r3) + b 1f +sarw_cl: lbz r3,CL(state) + b 1f +sarw_1: li r3,1 +1: lhbrx r4,MEM + andi. r3,r3,31 + addi r5,r3,-1 + extsh r4,r4 + beq- nop # no flags changed if count 0 + SET_FLAGS(SAR_FLAGS) + sraw result,r4,r3 + srw r5,r4,r5 + sthbrx result,MEM + rlwimi flags,r5,27,CF_VALUE + NEXT + +sarl_imm: NEXTBYTE(r3) + b 1f +sarl_cl: lbz r3,CL(state) + b 1f +sarl_1: li r3,1 +1: lwbrx r4,MEM + andi. r3,r3,31 + addi r5,r3,-1 + beq- nop # no flags changed if count 0 + SET_FLAGS(SAR_FLAGS) + sraw result,r4,r3 + srw r5,r4,r5 + stwbrx result,MEM + rlwimi flags,r5,27,CF_VALUE + NEXT + +/* Left shifts are quite easy: they use the flag mechanism of add */ +shlb_imm: NEXTBYTE(r3) + b 1f +shlb_cl: lbz r3,CL(state) + b 1f +shlb_1: li r3,1 +1: andi. r3,r3,31 + beq- nop # no flags changed if count 0 + lbzx op1,MEM + SET_FLAGS(FLAGS_ADD(B)) + slw result,op1,r3 + addi op2,op1,0 # for OF computation only ! + stbx result,MEM + NEXT + +shlw_imm: NEXTBYTE(r3) + b 1f +shlw_cl: lbz r3,CL(state) + b 1f +shlw_1: li r3,1 +1: andi. r3,r3,31 + beq- nop # no flags changed if count 0 + lhbrx op1,MEM + SET_FLAGS(FLAGS_ADD(W)) + slw result,op1,r3 + addi op2,op1,0 # for OF computation only ! + sthbrx result,MEM + NEXT + +/* That one may be wrong */ +shll_imm: NEXTBYTE(r3) + b 1f +shll_cl: lbz r3,CL(state) + b 1f +shll_1: li r3,1 +1: andi. r3,r3,31 + beq- nop # no flags changed if count 0 + lwbrx op1,MEM + addi r4,r3,-1 + SET_FLAGS(FLAGS_ADD(L)) + slw result,op1,r3 + addi op2,op1,0 # for OF computation only ! + slw op1,op1,r4 # for CF computation + stwbrx result,MEM + NEXT + +/* Right shifts are quite complex, because of funny flag rules ! */ +shrb_imm: NEXTBYTE(r3) + b 1f +shrb_cl: lbz r3,CL(state) + b 1f +shrb_1: li r3,1 +1: andi. r3,r3,31 + beq- nop # no flags changed if count 0 + lbzx op1,MEM + addi r4,r3,-1 + SET_FLAGS(FLAGS_SHR(B)) + srw result,op1,r3 + srw r4,op1,r4 + li op2,-1 # for OF computation only ! + stbx result,MEM + rlwimi flags,r4,27,CF_VALUE # Set CF + NEXT + +shrw_imm: NEXTBYTE(r3) + b 1f +shrw_cl: lbz r3,CL(state) + b 1f +shrw_1: li r3,1 +1: andi. r3,r3,31 + beq- nop # no flags changed if count 0 + lhbrx op1,MEM + addi r4,r3,-1 + SET_FLAGS(FLAGS_SHR(W)) + srw result,op1,r3 + srw r4,op1,r4 + li op2,-1 # for OF computation only ! + sthbrx result,MEM + rlwimi flags,r4,27,CF_VALUE # Set CF + NEXT + +shrl_imm: NEXTBYTE(r3) + b 1f +shrl_cl: lbz r3,CL(state) + b 1f +shrl_1: li r3,1 +1: andi. r3,r3,31 + beq- nop # no flags changed if count 0 + lwbrx op1,MEM + addi r4,r3,-1 + SET_FLAGS(FLAGS_SHR(L)) + srw result,op1,r3 + srw r4,op1,r4 + li op2,-1 # for OF computation only ! + stwbrx result,MEM + rlwimi flags,r4,27,CF_VALUE # Set CF + NEXT + +/* Double length shifts, shldw uses FLAGS_ADD for simplicity */ +shldw_imm: NEXTBYTE(r3) + b 1f +shldw_cl: lbz r3,CL(state) +1: andi. r3,r3,31 + beq- nop + lhbrx op1,MEM + SET_FLAGS(FLAGS_ADD(W)) + lhbrx op2,REG + rlwimi op1,op2,16,0,15 # op2:op1 + addi op2,op1,0 + rotlw result,op1,r3 + sthbrx result,MEM + NEXT + +shldl_imm: NEXTBYTE(r3) + b 1f +shldl_cl: lbz r3,CL(state) +1: andi. r3,r3,31 + beq- nop + lwbrx op1,MEM + SET_FLAGS(FLAGS_DBLSH(L)) + lwbrx op2,REG + subfic r4,r3,32 + slw result,op1,r3 + srw r4,op2,r4 + rotlw r3,op1,r3 + or result,result,r4 + addi op2,op1,0 + rlwimi flags,r3,27,CF_VALUE + stwbrx result,MEM + NEXT + +shrdw_imm: NEXTBYTE(r3) + b 1f +shrdw_cl: lbz r3,CL(state) +1: andi. r3,r3,31 + beq- nop + lhbrx op1,MEM + SET_FLAGS(FLAGS_DBLSH(W)) + lhbrx op2,REG + addi r4,r3,-1 + rlwimi op1,op2,16,0,15 # op2:op1 + addi op2,op1,0 + srw result,op1,r3 + srw r4,op1,r4 + sthbrx result,MEM + rlwimi flags,r4,27,CF_VALUE + NEXT + +shrdl_imm: NEXTBYTE(r3) + b 1f +shrdl_cl: lbz r3,CL(state) +1: andi. r3,r3,31 + beq- nop + lwbrx op1,MEM + SET_FLAGS(FLAGS_DBLSH(L)) + lwbrx op2,REG + subfic r4,r3,32 + srw result,op1,r3 + addi r3,r3,-1 + slw r4,op2,r4 + srw r3,op1,r3 + or result,result,r4 + addi op2,op1,0 + rlwimi flags,r3,27,CF_VALUE + stwbrx result,MEM + NEXT + +/* One operand multiplies: with result double the operand size, unsigned */ +mulb: lbzx op2,MEM + lbz op1,AL(state) + mullw result,op1,op2 + SET_FLAGS(FLAGS_MUL) + subfic r3,result,255 + sthbrx result,AX,state + rlwimi flags,r3,0,CF_VALUE|OF_VALUE + NEXT + +mulw: lhbrx op2,MEM + lhbrx op1,AX,state + mullw result,op1,op2 + SET_FLAGS(FLAGS_MUL) + li r4,DX + srwi r3,result,16 + sthbrx result,AX,state + neg r5,r3 + sthbrx r3,r4,state # DX + rlwimi flags,r5,0,CF_VALUE|OF_VALUE + NEXT + +mull: lwbrx op2,MEM + lwbrx op1,EAX,state + mullw result,op1,op2 + mulhwu. r3,op1,op2 + SET_FLAGS(FLAGS_MUL) + stwbrx result,EAX,state + li r4,EDX + stwbrx r3,r4,state + beq+ nop + oris flags,flags,(CF_SET|OF_SET)>>16 + NEXT + +/* One operand multiplies: with result double the operand size, signed */ +imulb: lbzx op2,MEM + extsb op2,op2 + lbz op1,AL(state) + extsb op1,op1 + mullw result,op1,op2 + SET_FLAGS(FLAGS_MUL) + extsb r3,result + sthbrx result,AX,state + cmpw r3,result + beq+ nop + oris flags,flags,(CF_SET|OF_SET)>>16 + NEXT + +imulw: lhbrx op2,MEM + extsh op2,op2 + lhbrx op1,AX,state + extsh op1,op1 + mullw result,op1,op2 + SET_FLAGS(FLAGS_MUL) + li r3,DX + extsh r4,result + srwi r5,result,16 + sthbrx result,AX,state + cmpw r4,result + sthbrx r5,r3,state + beq+ nop + oris flags,flags,(CF_SET|OF_SET)>>16 + NEXT + +imull: lwbrx op2,MEM + SET_FLAGS(FLAGS_MUL) + lwbrx op1,EAX,state + li r3,EDX + mulhw r4,op1,op2 + mullw result,op1,op2 + stwbrx r4,r3,state + srawi r3,result,31 + cmpw r3,r4 + beq+ nop + oris flags,flags,(CF_SET|OF_SET)>>16 + NEXT + +/* Other multiplies */ +imulw_mem_reg: lhbrx op2,REG + extsh op2,op2 + b 1f + +imulw_imm: NEXTWORD(op2) + extsh op2,op2 + b 1f + +imulw_imm8: NEXTBYTE(op2) + extsb op2,op2 +1: lhbrx op1,MEM + extsh op1,op1 + mullw result,op1,op2 + SET_FLAGS(FLAGS_MUL) + extsh r3,result + sthbrx result,REG + cmpw r3,result + beq+ nop + oris flags,flags,(CF_SET|OF_SET)>>16 + NEXT # SF/ZF/AF/PF undefined ! + +imull_mem_reg: lwbrx op2,REG + b 1f + +imull_imm: NEXTDWORD(op2) + b 1f + +imull_imm8: NEXTBYTE(op2) + extsb op2,op2 +1: lwbrx op1,MEM + mullw result,op1,op2 + SET_FLAGS(FLAGS_MUL) + mulhw r3,op1,op2 + srawi r4,result,31 + stwbrx result,REG + cmpw r3,r4 + beq+ nop + oris flags,flags,(CF_SET|OF_SET)>>16 + NEXT # SF/ZF/AF/PF undefined ! + +/* aad is indeed a multiply */ +aad: NEXTBYTE(r3) + lbz op1,AH(state) + lbz op2,AL(state) + mullw result,op1,r3 # AH*imm + SET_FLAGS(FLAGS_LOG(B)) # SF/ZF/PF from result + add result,result,op2 # AH*imm+AL + slwi r3,result,8 + sth r3,AX(state) # AH=0 + NEXT # OF/AF/CF undefined + +/* Unsigned divides: we may destroy all flags */ +divb: lhbrx r4,AX,state + lbzx r3,MEM + srwi r5,r4,8 + cmplw r5,r3 + bnl- _divide_error + divwu r5,r4,r3 + mullw r3,r5,r3 + sub r3,r4,r3 + stb r5,AL(state) + stb r3,AH(state) + NEXT + +divw: li opreg,DX + lhbrx r4,AX,state + lhbrx r5,REG + lhbrx r3,MEM + insrwi r4,r5,16,0 + cmplw r5,r3 + bnl- _divide_error + divwu r5,r4,r3 + mullw r3,r5,r3 + sub r3,r4,r3 + sthbrx r5,AX,state + sthbrx r3,REG + NEXT + +divl: li opreg,EDX # Not yet fully implemented + lwbrx r3,MEM + lwbrx r4,REG + lwbrx r5,EAX,state + cmplw r4,r3 + bnl- _divide_error + cmplwi r4,0 + bne- 1f + divwu r4,r5,r3 + mullw r3,r4,r3 + stwbrx r4,EAX,state + sub r3,r5,r3 + stwbrx r3,REG + NEXT +/* full implementation of 64:32 unsigned divide, slow but rarely used */ +1: bl _div_64_32 + stwbrx r5,EAX,state + stwbrx r4,REG + NEXT +/* + * Divide r4:r5 by r3, quotient in r5, remainder in r4. + * The algorithm is stupid because it won't be used very often. + */ +_div_64_32: li r7,32 + mtctr r7 +1: cmpwi r4,0 # always subtract in case + addc r5,r5,r5 # MSB is set + adde r4,r4,r4 + blt 2f + cmplw r4,r3 + blt 3f +2: sub r4,r4,r3 + addi r5,r5,1 +3: bdnz 1b + +/* Signed divides: we may destroy all flags */ +idivb: lbzx r3,MEM + lhbrx r4,AX,state + cmpwi r3,0 + beq- _divide_error + divw r5,r4,r3 + extsb r7,r5 + mullw r3,r5,r3 + cmpw r5,r7 + sub r3,r4,r3 + bne- _divide_error + stb r5,AL(state) + stb r3,AH(state) + NEXT + +idivw: li opreg,DX + lhbrx r4,AX,state + lhbrx r5,REG + lhbrx r3,MEM + insrwi r4,r5,16,0 + cmpwi r3,0 + beq- _divide_error + divw r5,r4,r3 + extsh r7,r5 + mullw r3,r5,r3 + cmpw r5,r7 + sub r3,r4,r3 + bne- _divide_error + sthbrx r5,AX,state + sthbrx r3,REG + NEXT + +idivl: li opreg,EDX # Not yet fully implemented + lwbrx r3,MEM + lwbrx r5,EAX,state + cmpwi cr1,r3,0 + lwbrx r4,REG + srwi r7,r5,31 + beq- _divide_error + add. r7,r7,r4 + bne- 1f # EDX not sign extension of EAX + divw r4,r5,r3 + xoris r7,r5,0x8000 # only overflow case is + orc. r7,r7,r3 # 0x80000000 divided by -1 + mullw r3,r4,r3 + beq- _divide_error + stwbrx r4,EAX,state + sub r3,r5,r3 + stwbrx r3,REG + NEXT + +/* full 64 by 32 signed divide, checks for overflow might be right now */ +1: srawi r6,r4,31 # absolute value of r4:r5 + srawi r0,r3,31 # absolute value of r3 + xor r5,r5,r6 + xor r3,r3,r0 + subfc r5,r6,r5 + xor r4,r4,r6 + sub r3,r3,r0 + subfe r4,r6,r4 + xor r0,r0,r6 # sign of result + cmplw r4,r3 # coarse overflow detection + bnl- _divide_error # (probably not necessary) + bl _div_64_32 + xor r5,r5,r0 # apply sign to result + sub r5,r5,r0 + xor. r7,r0,r5 # wrong sign: overflow + xor r4,r4,r6 # apply sign to remainder + blt- _divide_error + stwbrx r5,EAX,state + sub r4,r4,r6 + stwbrx r4,REG + NEXT + +/* aam is indeed a divide */ +aam: NEXTBYTE(r3) + lbz r4,AL(state) + cmpwi r3,0 + beq- _divide_error # zero divide + divwu op2,r4,r3 # AL/imm8 + SET_FLAGS(FLAGS_LOG(B)) # SF/ZF/PF from AL + mullw r3,op2,r3 # (AL/imm8)*imm8 + stb op2,AH(state) + sub result,r4,r3 # AL-imm8*(AL/imm8) + stb result,AL(state) + NEXT # OF/AF/CF undefined + +_divide_error: li r3,code_divide_err + b complex + +/* Instructions dealing with segment registers */ +pushw_sp_sr: li r3,SP + rlwinm opreg,opcode,31,27,29 + addi r5,state,SELECTORS+2 + lhbrx r4,state,r3 + lhzx r0,r5,opreg + addi r4,r4,-2 + sthbrx r4,state,r3 + clrlwi r4,r4,16 + sthbrx r0,r4,ssb + NEXT + +pushl_sp_sr: li r3,SP + rlwinm opreg,opcode,31,27,29 + addi r5,state,SELECTORS+2 + lhbrx r4,state,r3 + lhzx r0,r5,opreg + addi r4,r4,-4 + sthbrx r4,state,r3 + clrlwi r4,r4,16 + stwbrx r0,r4,ssb + NEXT + +movl_sr_mem: cmpwi opreg,20 + addi opreg,opreg,SELECTORS+2 + cmpw cr1,base,state # Only registers are sensitive + bgt- ud # to word/longword difference + lhzx r0,REG + bne cr1,1f + stwbrx r0,MEM # Actually a register + NEXT + +movw_sr_mem: cmpwi opreg,20 # SREG 0 to 5 only + addi opreg,opreg,SELECTORS+2 + bgt- ud + lhzx r0,REG +1: sthbrx r0,MEM + NEXT + +/* Now the instructions that modify the segment registers, note that +move/pop to ss disable interrupts and traps for one instruction ! */ +popl_sp_sr: li r6,4 + b 1f +popw_sp_sr: li r6,2 +1: li r7,SP + rlwinm opreg,opcode,31,27,29 + lhbrx offset,state,r7 + addi opreg,opreg,SELBASES + lhbrx r4,ssb,offset # new selector + add offset,offset,r6 + bl _segment_load + sthbrx offset,state,r7 # update sp + cmpwi opreg,8 # is ss ? + stwux r3,REG + stw r4,SELECTORS-SELBASES(opreg) + lwz esb,esbase(state) + bne+ nop + lwz ssb,ssbase(state) # pop ss + crmove RF,TF # prevent traps + NEXT + +movw_mem_sr: cmpwi opreg,20 + addi r7,state,SELBASES + bgt- ud + cmpwi opreg,4 # CS illegal + beq- ud + lhbrx r4,MEM + bl _segment_load + stwux r3,r7,opreg + cmpwi opreg,8 + stw r4,SELECTORS-SELBASES(r7) + lwz esb,esbase(state) + bne+ nop + lwz ssb,ssbase(state) + crmove RF,TF # prevent traps + NEXT + + .equ movl_mem_sr, movw_mem_sr + +/* The encoding of les/lss/lds/lfs/lgs is strange, opcode is c4/b2/c5/b4/b5 +for es/ss/ds/fs/gs which are sreg 0/2/3/4/5. And obviously there is +no lcs instruction, it's called a far jump. */ + +ldlptrl: lwzux r7,MEM + li r4,4 + bl 1f + stwx r7,REG + NEXT +ldlptrw: lhzux r7,MEM + li r4,2 + bl 1f + sthx r7,REG + NEXT + +1: cmpw base,state + lis r3,0xc011 # es/ss/ds/fs/gs + rlwinm r5,opcode,2,0x0c # 00/08/04/00/04 + mflr r0 + addi r3,r3,0x4800 # r4=0xc0114800 + rlwimi r5,opcode,0,0x10 # 00/18/04/10/14 + lhbrx r4,r4,offset + rlwnm opcode,r3,r5,0x1c # 00/08/0c/10/14 = sreg*4 ! + beq- ud # Only mem operands allowed ! + bl _segment_load + addi r5,opcode,SELBASES + stwux r3,r5,state + mtlr r0 + stw r4,SELECTORS-SELBASES(r5) + lwz esb,esbase(state) # keep shadow state in sync + lwz ssb,ssbase(state) + blr + + +/* Intructions that may modify the current code segment: the next optimization + * might be to avoid calling C code when the code segment does not change. But + * it's probably not worth the effort. + */ +/* Far calls, jumps and returns */ +lcall_w: NEXTWORD(r4) + NEXTWORD(r5) + li r3,code_lcallw + b complex + +lcall_l: NEXTDWORD(r4) + NEXTWORD(r5) + li r3,code_lcalll + b complex + +lcallw: lhbrx r4,MEM + addi offset,offset,2 + lhbrx r5,MEM + li r3,code_lcallw + b complex + +lcalll: lwbrx r4,MEM + addi offset,offset,4 + lhbrx r5,MEM + li r3,code_lcalll + b complex + +ljmp_w: NEXTWORD(r4) + NEXTWORD(r5) + li r3,code_ljmpw + b complex + +ljmp_l: NEXTDWORD(r4) + NEXTWORD(r5) + li r3,code_ljmpl + b complex + +ljmpw: lhbrx r4,MEM + addi offset,offset,2 + lhbrx r5,MEM + li r3,code_ljmpw + b complex + +ljmpl: lwbrx r4,MEM + addi offset,offset,4 + lhbrx r5,MEM + li r3,code_ljmpl + b complex + +lretw_imm: NEXTWORD(r4) + b 1f +lretw: li r4,0 +1: li r3,code_lretw + b complex + +lretl_imm: NEXTWORD(r4) + b 1f +lretl: li r4,0 +1: li r3,code_lretl + b complex + +/* Interrupts */ +int: li r3,code_softint # handled by C code + NEXTBYTE(r4) + b complex + +int3: li r3,code_int3 # handled by C code + b complex + +into: EVAL_OF + bf+ OF,nop + li r3,code_into + b complex # handled by C code + +iretw: li r3,code_iretw # handled by C code + b complex + +iretl: li r3,code_iretl + b complex + +/* Miscellaneous flag control instructions */ +clc: oris flags,flags,(CF_IN_CR|CF_STATE_MASK|ABOVE_IN_CR)>>16 + xoris flags,flags,(CF_IN_CR|CF_STATE_MASK|ABOVE_IN_CR)>>16 + NEXT + +cmc: oris flags,flags,(CF_IN_CR|ABOVE_IN_CR)>>16 + xoris flags,flags,(CF_IN_CR|CF_COMPLEMENT|ABOVE_IN_CR)>>16 + NEXT + +stc: oris flags,flags,\ + (CF_IN_CR|CF_LOCATION|CF_COMPLEMENT|ABOVE_IN_CR)>>16 + xoris flags,flags,(CF_IN_CR|CF_LOCATION|ABOVE_IN_CR)>>16 + NEXT + +cld: crclr DF + NEXT + +std: crset DF + NEXT + +cli: crclr IF + NEXT + +sti: crset IF + NEXT + +lahf: bl _eval_flags + stb r3,AH(state) + NEXT + +sahf: andis. r3,flags,OF_EXPLICIT>>16 + lbz r0,AH(state) + beql+ _eval_of # save OF just in case + rlwinm op1,r0,31,0x08 # AF + rlwinm flags,flags,0,OF_STATE_MASK + extsb result,r0 # SF/PF + ZF862ZF(r0) + oris flags,flags,(ZF_PROTECT|ZF_IN_CR|SF_IN_CR)>>16 + addi op2,op1,0 # AF + ori result,result,0x00fb # set all except PF + mtcrf 0x02,r0 # SF/ZF + rlwimi flags,r0,27,CF_VALUE # CF + xori result,result,0x00ff # 00 if PF set, 04 if clear + NEXT + +pushfw_sp: bl _eval_flags + li r4,SP + lhbrx r5,r4,state + addi r5,r5,-2 + sthbrx r5,r4,state + clrlwi r5,r5,16 + sthbrx r3,ssb,r5 + NEXT + +pushfl_sp: bl _eval_flags + li r4,SP + lhbrx r5,r4,state + addi r5,r5,-4 + sthbrx r5,r4,state + clrlwi r5,r5,16 + stwbrx r3,ssb,r5 + NEXT + +popfl_sp: li r4,SP + lhbrx r5,r4,state + lwbrx r3,ssb,r5 + addi r5,r5,4 + stw r3,eflags(state) + sthbrx r5,r4,state + b 1f + +popfw_sp: li r4,SP + lhbrx r5,r4,state + lhbrx r3,ssb,r5 + addi r5,r5,2 + sth r3,eflags+2(state) + sthbrx r5,r4,state +1: rlwinm op1,r3,31,0x08 # AF + xori result,r3,4 # PF + ZF862ZF(r3) # cr6 + lis flags,(OF_EXPLICIT|ZF_PROTECT|ZF_IN_CR|SF_IN_CR)>>16 + addi op2,op1,0 # AF + rlwinm result,result,0,0x04 # PF + rlwimi flags,r3,27,CF_VALUE # CF + mtcrf 0x6,r3 # IF,DF,TF,SF,ZF + rlwimi result,r3,24,0,0 # SF + rlwimi flags,r3,15,OF_VALUE # OF + NEXT + +/* SETcc is slightly faster for setz/setnz */ +setz: EVAL_ZF + bt ZF,1f +0: cmpwi opreg,0 + bne- ud + stbx opreg,MEM + NEXT + +setnz: EVAL_ZF + bt ZF,0b +1: cmpwi opreg,0 + bne- ud + stbx one,MEM + NEXT + +#define SETCC(cond, eval, flag) \ +set##cond: EVAL_##eval; bt flag,1b; b 0b; \ +setn##cond: EVAL_##eval; bt flag,0b; b 1b + + SETCC(c, CF, CF) + SETCC(a, ABOVE, ABOVE) + SETCC(s, SF, SF) + SETCC(g, SIGNED, SGT) + SETCC(l, SIGNED, SLT) + SETCC(o, OF, OF) + SETCC(p, PF, PF) + +/* No wait for a 486SX */ + .equ wait, nop + +/* ARPL is not recognized in real mode */ + .equ arpl, ud + +/* clts and in general control and debug registers are not implemented */ + .equ clts, unimpl + +aaa: lhbrx r0,AX,state + bl _eval_af + rlwinm r3,r3,0,0x10 + SET_FLAGS(FLAGS_ADD(W)) + rlwimi r3,r0,0,0x0f + li r4,0x106 + addi r3,r3,-10 + srwi r3,r3,16 # carry ? 0 : 0xffff + andc op1,r4,r3 # carry ? 0x106 : 0 + add result,r0,op1 + rlwinm result,result,0,28,23 # clear high half of AL + li op2,10 # sets AF indirectly + sthbrx r3,AX,state # OF/SF/ZF/PF undefined ! + rlwimi result,op1,8,0x10000 # insert CF + NEXT + +aas: lhbrx r0,AX,state + bl _eval_af + rlwinm r3,r3,0,0x10 + SET_FLAGS(FLAGS_ADD(W)) + rlwimi r3,r0,0,0x0f # AF:AL&0x0f + li r4,0x106 + addi r3,r3,-10 + srwi r3,r3,16 # carry ? 0 : 0xffff + andc op1,r4,r3 # carry ? 0x106 : 0 + sub result,r0,op1 + rlwinm result,result,0,28,23 # clear high half of AL + li op2,10 # sets AF indirectly + sthbrx r3,AX,state # OF/SF/ZF/PF undefined ! + rlwimi result,op1,8,0x10000 # insert CF + NEXT + +daa: lbz r0,AL(state) + bl _eval_af + rlwinm r7,r3,0,0x10 + bl _eval_cf # r3=CF<<8 + rlwimi r7,r0,0,0x0f + SET_FLAGS(FLAGS_ADD(B)) + addi r4,r7,-10 + rlwinm r4,r4,3,0x06 # 6 if AF or >9, 0 otherwise + srwi op1,r7,1 # 0..4, no AF, 5..f AF set + add r0,r0,r4 # conditional add + li op2,11 # sets AF depnding on op1 + or r0,r0,r3 + subfic r3,r0,159 + rlwinm r3,r3,7,0x60 # mask value to add + add result,r0,r3 # final result for SF/ZF/PF + stb result,AL(state) + rlwimi result,r3,2,0x100 # set CF if added + NEXT + +das: lbz r0,AL(state) + bl _eval_af + rlwinm r7,r3,0,0x10 + bl _eval_cf + rlwimi r7,r0,0,0x0f + SET_FLAGS(FLAGS_ADD(B)) + addi r4,r7,-10 + rlwinm r4,r4,3,0x06 + srwi op1,r7,1 # 0..4, no AF, 5..f AF set + sub r0,r0,r4 # conditional add + li op2,11 # sets AF depending on op1 + or r4,r0,r3 # insert CF + addi r3,r4,-160 + rlwinm r3,r3,7,0x60 # mask value to add + sub result,r4,r3 # final result for SF/ZF/PF + stb result,AL(state) + rlwimi result,r3,2,0x100 # set CF + NEXT + +/* 486 specific instructions */ + +/* For cmpxchg, only the zero flag is important */ + +cmpxchgb: lbz op1,AL(state) + SET_FLAGS(FLAGS_SUB(B)|ZF_IN_CR) + lbzx op2,MEM + cmpw cr6,op1,op2 + sub result,op1,op2 + bne cr6,1f + lbzx r3,REG # success: swap + stbx r3,MEM + NEXT +1: stb op2,AL(state) + NEXT + +cmpxchgw: lhbrx op1,AX,state + SET_FLAGS(FLAGS_SUB(W)|ZF_IN_CR) + lhbrx op2,MEM + cmpw cr6,op1,op2 + sub result,op1,op2 + bne cr6,1f + lhzx r3,REG # success: swap + sthx r3,MEM + NEXT +1: sthbrx op2,AX,state + NEXT + +cmpxchgl: lwbrx op1,EAX,state + SET_FLAGS(FLAGS_SUB(L)|ZF_IN_CR|SIGNED_IN_CR) + lwbrx op2,MEM + cmpw cr6,op1,op2 + sub result,op1,op2 + bne cr6,1f + lwzx r3,REG # success: swap + stwx r3,MEM + NEXT +1: stwbrx op2,EAX,state + NEXT + +xaddb: lbzx op2,MEM + SET_FLAGS(FLAGS_ADD(B)) + lbzx op1,REG + add result,op1,op2 + stbx result,MEM + stbx op2,REG + NEXT + +xaddw: lhbrx op2,MEM + SET_FLAGS(FLAGS_ADD(W)) + lhbrx op1,REG + add result,op1,op2 + sthbrx result,MEM + sthbrx op2,REG + NEXT + +xaddl: lwbrx op2,MEM + SET_FLAGS(FLAGS_ADD(L)) + lwbrx op1,REG + add result,op1,op2 + stwbrx result,MEM + stwbrx op2,REG + NEXT + +/* All FPU instructions skipped. This is a 486 SX ! */ +esc: li r3,code_dna # DNA interrupt + b complex + + .equ hlt, unimpl # Cannot stop + + .equ invd, unimpl + +/* Undefined in real address mode */ + .equ lar, ud + + .equ lgdt, unimpl + .equ lidt, unimpl + .equ lldt, ud + .equ lmsw, unimpl + +/* protected mode only */ + .equ lsl, ud + .equ ltr, ud + + .equ movl_cr_reg, unimpl + .equ movl_reg_cr, unimpl + .equ movl_dr_reg, unimpl + .equ movl_reg_dr, unimpl + + .equ sgdt, unimpl + + .equ sidt, unimpl + .equ sldt, ud + .equ smsw, unimpl + + .equ str, ud + +ud: li r3,code_ud + li r4,0 + b complex + +unimpl: li r3,code_ud + li r4,1 + b complex + + .equ verr, ud + .equ verw, ud + .equ wbinvd, unimpl + +em86_end: + .size em86_enter,em86_end-em86_enter +#ifdef __BOOT__ + .data +#define ENTRY(x,t) .long x+t-_jtables +#else + .section .rodata +#define ENTRY(x,t) .long x+t +#endif + +#define BOP(x) ENTRY(x,2) /* Byte operation with mod/rm byte */ +#define WLOP(x) ENTRY(x,3) /* 16 or 32 bit operation with mod/rm byte */ +#define EXTOP(x) ENTRY(x,0) /* Opcode with extension in mod/rm byte */ +#define OP(x) ENTRY(x,1) /* Direct one byte opcode/prefix */ + +/* A few macros for the main table */ +#define gen6(op, wl, axeax) \ + BOP(op##b##_reg_mem); WLOP(op##wl##_reg_mem); \ + BOP(op##b##_mem_reg); WLOP(op##wl##_mem_reg); \ + OP(op##b##_imm_al); OP(op##wl##_imm_##axeax) + +#define rep7(l,t) \ + ENTRY(l,t); ENTRY(l,t); ENTRY(l,t); ENTRY(l,t); \ + ENTRY(l,t); ENTRY(l,t); ENTRY(l,t) + +#define rep8(l) l ; l; l; l; l; l; l; l; + +#define allcond(pfx, sfx, t) \ + ENTRY(pfx##o##sfx, t); ENTRY(pfx##no##sfx, t); \ + ENTRY(pfx##c##sfx, t); ENTRY(pfx##nc##sfx, t); \ + ENTRY(pfx##z##sfx, t); ENTRY(pfx##nz##sfx, t); \ + ENTRY(pfx##na##sfx, t); ENTRY(pfx##a##sfx, t); \ + ENTRY(pfx##s##sfx, t); ENTRY(pfx##ns##sfx, t); \ + ENTRY(pfx##p##sfx, t); ENTRY(pfx##np##sfx, t); \ + ENTRY(pfx##l##sfx, t); ENTRY(pfx##nl##sfx, t); \ + ENTRY(pfx##ng##sfx, t); ENTRY(pfx##g##sfx, t) + +/* single/double register sign extensions and other oddities */ +#define h2sextw cbw /* Half to Single sign extension */ +#define s2dextw cwd /* Single to Double sign extension */ +#define h2sextl cwde +#define s2dextl cdq +#define j_a16_cxz_w jcxz_w +#define j_a32_cxz_w jecxz_w +#define j_a16_cxz_l jcxz_l +#define j_a32_cxz_l jecxz_l +#define loopa16_w loopw_w +#define loopa16_l loopw_l +#define loopa32_w loopl_w +#define loopa32_l loopl_l +#define loopnza16_w loopnzw_w +#define loopnza16_l loopnzw_l +#define loopnza32_w loopnzl_w +#define loopnza32_l loopnzl_l +#define loopza16_w loopzw_w +#define loopza16_l loopzw_l +#define loopza32_w loopzl_w +#define loopza32_l loopzl_l +/* No FP support */ + +/* Addressing mode table */ + .align 5 +# (%bx,%si), (%bx,%di), (%bp,%si), (%bp,%di) +adtable: .long 0x00004360, 0x00004370, 0x80004560, 0x80004570 +# (%si), (%di), o16, (%bx) + .long 0x00004600, 0x00004700, 0x00002000, 0x00004300 +# o8(%bx,%si), o8(%bx,%di), o8(%bp,%si), o8(%bp,%di) + .long 0x00004360, 0x00004370, 0x80004560, 0x80004570 +# o8(%si), o8(%di), o8(%bp), o8(%bx) + .long 0x00004600, 0x00004700, 0x80004500, 0x00004300 +# o16(%bx,%si), o16(%bx,%di), o16(%bp,%si), o16(%bp,%di) + .long 0x00004360, 0x00004370, 0x80004560, 0x80004570 +# o16(%si), o16(%di), o16(%bp), o16(%bx) + .long 0x00004600, 0x00004700, 0x80004500, 0x00004300 +# register addressing modes do not use the table + .long 0, 0, 0, 0, 0, 0, 0, 0 +#now 32 bit modes +# (%eax), (%ecx), (%edx), (%ebx) + .long 0x00004090, 0x00004190, 0x00004290, 0x00004390 +# sib, o32, (%esi), (%edi) + .long 0x00003090, 0x00002090, 0x00004690, 0x00004790 +# o8(%eax), o8(%ecx), o8(%edx), o8(%ebx) + .long 0x00004090, 0x00004190, 0x00004290, 0x00004390 +# sib, o8(%ebp), o8(%esi), o8(%edi) + .long 0x00003090, 0x80004590, 0x00004690, 0x00004790 +# o32(%eax), o32(%ecx), o32(%edx), o32(%ebx) + .long 0x00004090, 0x00004190, 0x00004290, 0x00004390 +# sib, o32(%ebp), o32(%esi), o32(%edi) + .long 0x00003090, 0x80004590, 0x00004690, 0x00004790 +# register addressing modes do not use the table + .long 0, 0, 0, 0, 0, 0, 0, 0 + +#define jtable(wl, awl, spesp, axeax, name ) \ + .align 5; \ +jtab_##name: gen6(add, wl, axeax); \ + OP(push##wl##_##spesp##_sr); \ + OP(pop##wl##_##spesp##_sr); \ + gen6(or, wl, axeax); \ + OP(push##wl##_##spesp##_sr); \ + OP(_twobytes); \ + gen6(adc, wl, axeax); \ + OP(push##wl##_##spesp##_sr); \ + OP(pop##wl##_##spesp##_sr); \ + gen6(sbb, wl, axeax); \ + OP(push##wl##_##spesp##_sr); \ + OP(pop##wl##_##spesp##_sr); \ + gen6(and, wl, axeax); OP(_es); OP(daa); \ + gen6(sub, wl, axeax); OP(_cs); OP(das); \ + gen6(xor, wl, axeax); OP(_ss); OP(aaa); \ + gen6(cmp, wl, axeax); OP(_ds); OP(aas); \ + rep8(OP(inc##wl##_reg)); \ + rep8(OP(dec##wl##_reg)); \ + rep8(OP(push##wl##_##spesp##_reg)); \ + rep8(OP(pop##wl##_##spesp##_reg)); \ + OP(pusha##wl##_##spesp); OP(popa##wl##_##spesp); \ + WLOP(bound##wl); WLOP(arpl); \ + OP(_fs); OP(_gs); OP(_opsize); OP(_adsize); \ + OP(push##wl##_##spesp##_imm); WLOP(imul##wl##_imm); \ + OP(push##wl##_##spesp##_imm8); WLOP(imul##wl##_imm8); \ + OP(insb_##awl); OP(ins##wl##_##awl); \ + OP(outsb_##awl); OP(outs##wl##_##awl); \ + allcond(sj,_##wl,1); \ + EXTOP(grp1b_imm); EXTOP(grp1##wl##_imm); \ + EXTOP(grp1b_imm); EXTOP(grp1##wl##_imm8); \ + BOP(testb_reg_mem); WLOP(test##wl##_reg_mem); \ + BOP(xchgb_reg_mem); WLOP(xchg##wl##_reg_mem); \ + BOP(movb_reg_mem); WLOP(mov##wl##_reg_mem); \ + BOP(movb_mem_reg); WLOP(mov##wl##_mem_reg); \ + WLOP(mov##wl##_sr_mem); WLOP(lea##wl); \ + WLOP(mov##wl##_mem_sr); WLOP(pop##wl##_##spesp##_##awl); \ + OP(nop); rep7(xchg##wl##_##axeax##_reg,1); \ + OP(h2sext##wl); OP(s2dext##wl); \ + OP(lcall_##wl); OP(wait); \ + OP(pushf##wl##_##spesp); OP(popf##wl##_##spesp); \ + OP(sahf); OP(lahf); \ + OP(movb_##awl##_al); OP(mov##wl##_##awl##_##axeax); \ + OP(movb_al_##awl); OP(mov##wl##_##axeax##_##awl); \ + OP(movsb_##awl); OP(movs##wl##_##awl); \ + OP(cmpsb_##awl); OP(cmps##wl##_##awl); \ + OP(testb_imm_al); OP(test##wl##_imm_##axeax); \ + OP(stosb_##awl); OP(stos##wl##_##awl); \ + OP(lodsb_##awl); OP(lods##wl##_##awl); \ + OP(scasb_##awl); OP(scas##wl##_##awl); \ + rep8(OP(movb_imm_reg)); \ + rep8(OP(mov##wl##_imm_reg)); \ + EXTOP(shiftb_imm); EXTOP(shift##wl##_imm); \ + OP(ret##wl##_##spesp##_imm); OP(ret##wl##_##spesp); \ + WLOP(ldlptr##wl); WLOP(ldlptr##wl); \ + BOP(movb_imm_mem); WLOP(mov##wl##_imm_mem); \ + OP(enter##wl##_##spesp); OP(leave##wl##_##spesp); \ + OP(lret##wl##_imm); OP(lret##wl); \ + OP(int3); OP(int); OP(into); OP(iret##wl); \ + EXTOP(shiftb_1); EXTOP(shift##wl##_1); \ + EXTOP(shiftb_cl); EXTOP(shift##wl##_cl); \ + OP(aam); OP(aad); OP(ud); OP(xlatb_##awl); \ + rep8(OP(esc)); \ + OP(loopnz##awl##_##wl); OP(loopz##awl##_##wl); \ + OP(loop##awl##_##wl); OP(j_##awl##_cxz_##wl); \ + OP(inb_port_al); OP(in##wl##_port_##axeax); \ + OP(outb_al_port); OP(out##wl##_##axeax##_port); \ + OP(call##wl##_##spesp); OP(jmp_##wl); \ + OP(ljmp_##wl); OP(sjmp_##wl); \ + OP(inb_dx_al); OP(in##wl##_dx_##axeax); \ + OP(outb_al_dx); OP(out##wl##_##axeax##_dx); \ + OP(_lock); OP(ud); OP(_repnz); OP(_repz); \ + OP(hlt); OP(cmc); \ + EXTOP(grp3b); EXTOP(grp3##wl); \ + OP(clc); OP(stc); OP(cli); OP(sti); \ + OP(cld); OP(std); \ + EXTOP(grp4b); EXTOP(grp5##wl##_##spesp); \ + /* Here we start the table for twobyte instructions */ \ + OP(ud); OP(ud); WLOP(lar); WLOP(lsl); \ + OP(ud); OP(ud); OP(clts); OP(ud); \ + OP(invd); OP(wbinvd); OP(ud); OP(ud); \ + OP(ud); OP(ud); OP(ud); OP(ud); \ + rep8(OP(ud)); \ + rep8(OP(ud)); \ + OP(movl_cr_reg); OP(movl_reg_cr); \ + OP(movl_dr_reg); OP(movl_reg_dr); \ + OP(ud); OP(ud); OP(ud); OP(ud); \ + rep8(OP(ud)); \ + /* .long wrmsr, rdtsc, rdmsr, rdpmc; */\ + rep8(OP(ud)); \ + rep8(OP(ud)); \ + /* allcond(cmov, wl); */ \ + rep8(OP(ud)); rep8(OP(ud)); \ + rep8(OP(ud)); rep8(OP(ud)); \ + /* MMX Start */ \ + rep8(OP(ud)); rep8(OP(ud)); \ + rep8(OP(ud)); rep8(OP(ud)); \ + /* MMX End */ \ + allcond(j,_##wl, 1); \ + allcond(set,,2); \ + OP(push##wl##_##spesp##_sr); OP(pop##wl##_##spesp##_sr); \ + OP(ud) /* cpuid */; WLOP(bt##wl##_reg_mem); \ + WLOP(shld##wl##_imm); WLOP(shld##wl##_cl); \ + OP(ud); OP(ud); \ + OP(push##wl##_##spesp##_sr); OP(pop##wl##_##spesp##_sr); \ + OP(ud) /* rsm */; WLOP(bts##wl##_reg_mem); \ + WLOP(shrd##wl##_imm); WLOP(shrd##wl##_cl); \ + OP(ud); WLOP(imul##wl##_mem_reg); \ + BOP(cmpxchgb); WLOP(cmpxchg##wl); \ + WLOP(ldlptr##wl); WLOP(btr##wl##_reg_mem); \ + WLOP(ldlptr##wl); WLOP(ldlptr##wl); \ + WLOP(movzb##wl); WLOP(movzw##wl); \ + OP(ud); OP(ud); \ + EXTOP(grp8##wl); WLOP(btc##wl##_reg_mem); \ + WLOP(bsf##wl); WLOP(bsr##wl); \ + WLOP(movsb##wl); WLOP(movsw##wl); \ + BOP(xaddb); WLOP(xadd##wl); \ + OP(ud); OP(ud); \ + OP(ud); OP(ud); OP(ud); OP(ud); \ + rep8(OP(bswap)); \ + /* MMX Start */ \ + rep8(OP(ud)); rep8(OP(ud)); \ + rep8(OP(ud)); rep8(OP(ud)); \ + rep8(OP(ud)); rep8(OP(ud)); \ + /* MMX End */ + .align 5 /* 8kb of tables, 32 byte aligned */ +_jtables: jtable(w, a16, sp, ax, www) /* data16, addr16 */ + jtable(l, a16, sp, eax, lww) /* data32, addr16 */ + jtable(w, a32, sp, ax, wlw) /* data16, addr32 */ + jtable(l, a32, sp, eax, llw) /* data32, addr32 */ +/* The other possible combinations are only required by protected mode +code using a big stack segment */ +/* Here are the auxiliary tables for opcode extensions, note that +all entries get 2 or 3 added. */ +#define grp1table(bwl,t,s8) \ +grp1##bwl##_imm##s8:; \ + ENTRY(add##bwl##_imm##s8,t); ENTRY(or##bwl##_imm##s8,t); \ + ENTRY(adc##bwl##_imm##s8,t); ENTRY(sbb##bwl##_imm##s8,t); \ + ENTRY(and##bwl##_imm##s8,t); ENTRY(sub##bwl##_imm##s8,t); \ + ENTRY(xor##bwl##_imm##s8,t); ENTRY(cmp##bwl##_imm##s8,t) + + grp1table(b,2,) + grp1table(w,3,) + grp1table(w,3,8) + grp1table(l,3,) + grp1table(l,3,8) + +#define shifttable(bwl,t,c) \ +shift##bwl##_##c:; \ + ENTRY(rol##bwl##_##c,t); ENTRY(ror##bwl##_##c,t); \ + ENTRY(rcl##bwl##_##c,t); ENTRY(rcr##bwl##_##c,t); \ + ENTRY(shl##bwl##_##c,t); ENTRY(shr##bwl##_##c,t); \ + OP(ud); ENTRY(sar##bwl##_##c,t) + + shifttable(b,2,1) + shifttable(w,3,1) + shifttable(l,3,1) + + shifttable(b,2,cl) + shifttable(w,3,cl) + shifttable(l,3,cl) + + shifttable(b,2,imm) + shifttable(w,3,imm) + shifttable(l,3,imm) + +#define grp3table(bwl,t) \ +grp3##bwl: ENTRY(test##bwl##_imm,t); OP(ud); \ + ENTRY(not##bwl,t); ENTRY(neg##bwl,t); \ + ENTRY(mul##bwl,t); ENTRY(imul##bwl,t); \ + ENTRY(div##bwl,t); ENTRY(idiv##bwl,t) + + grp3table(b,2) + grp3table(w,3) + grp3table(l,3) + + +grp4b: BOP(incb); BOP(decb); \ + OP(ud); OP(ud); \ + OP(ud); OP(ud); \ + OP(ud); OP(ud) + +#define grp5table(wl,spesp) \ +grp5##wl##_##spesp: \ + WLOP(inc##wl); WLOP(dec##wl); \ + WLOP(call##wl##_##spesp##_mem); WLOP(lcall##wl##); \ + WLOP(jmp##wl); WLOP(ljmp##wl); \ + WLOP(push##wl##_##spesp); OP(ud) + + grp5table(w,sp) + grp5table(l,sp) + +#define grp8table(wl) \ +grp8##wl: OP(ud); OP(ud); OP(ud); OP(ud); \ + WLOP(bt##wl##_imm); WLOP(bts##wl##_imm); \ + WLOP(btr##wl##_imm); WLOP(btc##wl##_imm) + + grp8table(w) + grp8table(l) +#ifdef __BOOT__ +_endjtables: .long 0 /* Points to _jtables after relocation */ +#endif + -- cgit v1.2.3