diff options
Diffstat (limited to 'c/src/lib/libbsp/powerpc/shared/bootloader/em86real.S')
-rw-r--r-- | c/src/lib/libbsp/powerpc/shared/bootloader/em86real.S | 4553 |
1 files changed, 0 insertions, 4553 deletions
diff --git a/c/src/lib/libbsp/powerpc/shared/bootloader/em86real.S b/c/src/lib/libbsp/powerpc/shared/bootloader/em86real.S deleted file mode 100644 index e640541fe2..0000000000 --- a/c/src/lib/libbsp/powerpc/shared/bootloader/em86real.S +++ /dev/null @@ -1,4553 +0,0 @@ -/* - * em86real.S - * - * Copyright (C) 1998, 1999 Gabriel Paubert, paubert@iram.es - * - * Modified to compile in RTEMS development environment - * by Eric Valette - * - * Copyright (C) 1999 Eric Valette. valette@crf.canon.fr - * - * The license and distribution terms for this file may be - * found in the file LICENSE in this distribution or at - * http://www.rtems.org/license/LICENSE. - */ - -/* If the symbol __BOOT__ is defined, a slightly different version is - * generated to be compiled with the -m relocatable option - */ - -#ifdef __BOOT__ -#include "bootldr.h" -/* It is impossible to gather statistics in the boot version */ -#undef EIP_STATS -#endif - -/* - * - * Given the size of this code, it deserves a few comments on how it works, - * and why it was implemented the way it is. - * - * The goal is to have a real mode i486SX emulator to initialize hardware, - * mostly graphics boards, by interpreting ROM BIOSes. The choice of a 486SX - * is logical since this is the lowest processor that PCI ROM BIOSes must run - * on. - * - * The goal of this emulator is not performance, but a small enough memory - * footprint to include it in a bootloader. - * - * It is actually likely to be comparable to a 25MHz 386DX on a 200MHz 603e ! - * This is not as serious as it seems since most of the BIOS code performs - * a lot of accesses to I/O and non-cacheable memory spaces. For such - * instructions, the execution time is often dominated by bus accesses. - * Statistics of the code also shows that it spends a large function of - * the time in loops waiting for vertical retrace or programs one of the - * timers and waits for the count to go down to zero. This type of loop - * runs emulated at the same speed as on 5 GHz Pentium IV++ ;) - * - */ - -/* - * Known bugs or differences with a real 486SX (real mode): - * - segment limits are not enforced (too costly) - * - xchg instructions with memory are not locked - * - lock prefixes are not implemented at all - * - long divides implemented but perhaps still buggy - * - miscellaneous system instructions not implemented - * (some probably cannot be implemented) - * - neither control nor debug registers are implemented for the time being - * (debug registers are impossible to implement at a reasonable cost) - */ - -/* Code options, put them on the compiler command line */ -/* #define EIP_STATS */ /* EIP based profiling */ -/* #undef EIP_STATS */ - -/* - * Implementation notes: - * - * A) flags emulation. - * - * The most important decisions when it comes to obtain a reasonable speed - * are related to how the EFLAGS register is emulated. - * - * Note: the code to set up flags is complex, but it is only seldom - * executed since cmp and test instructions use much faster flag evaluation - * paths. For example the overflow flag is almost only needed for pushf and - * int. Comparison results only involve (SF^OF) or (SF^OF)+ZF and the - * implementation is fast in this case. - * - * Rarely used flags: AC, NT and IOPL are kept in a memory EFLAGS image. - * All other flags are either kept explicitly in PPC cr (DF, IF, and TF) or - * lazily evaluated from the state of 4 registers called flags, result, op1, - * op2, and sometimes the cr itself. The emulation has been designed for - * minimal overhead for the common case where the flags are never used. With - * few exceptions, all instructions that set flags leave the result of the - * computation in a register called result, and operands are taken from op1 - * and op2 registers. However a few instructions like cmp, test and bit tests - * (bt/btc/btr/bts/bsf/bsr) explicitly set cr bits to short circuit - * condition code evaluation of conditional instructions. - * - * As a very brief summary: - * - * - the result of the last flag setting operation is often either in the - * result register or in op2 after increment or decrement instructions - * because result and op1 may be needed to compute the carry. - * - * - compare instruction leave the result of the unsigned comparison - * in cr4 and of signed comparison in cr6. This means that: - * - cr4[0]=CF (short circuit for jc/jnc) - * - cr4[1]=~(CF+ZF) (short circuit for ja/jna) - * - cr6[0]=(OF^SF) (short circuit for jl/jnl) - * - cr6[1]=~((SF^OF)+ZF) (short circuit for jg/jng) - * - cr6[2]=ZF (short circuit for jz/jnz) - * - * - test instruction set flags in cr6 and clear overflow. This means that: - * - cr6[0]=SF=(SF^OF) (short circuit for jl/jnl/js/jns) - * - cr6[1]=~((SF^OF)+ZF) (short circuit for jg/jng) - * - cr6[2]=ZF (short circuit for jz/jnz) - * - * All flags may be lazily evaluated from several values kept in registers: - * - * Flag: Depends upon: - * OF result, op1, op2, flags[INCDEC_FIELD,SUBTRACTING,OF_STATE_MASK] - * SF result, op2, flags[INCDEC_FIELD,RES_SIZE] - * ZF result, op2, cr6[2], flags[INCDEC_FIELD,RES_SIZE,ZF_PROTECT] - * AF op1, op2, flags[INCDEC_FIELD,SUBTRACTING,CF_IN] - * PF result, op2, flags[INCDEC_FIELD] - * CF result, op1, flags[CF_STATE_MASK, CF_IN] - * - * The order of the fields in the flags register has been chosen so that a - * single rlwimi is necessary for common instruction that do not affect all - * flags. (See the code for inc/dec emulation). - * - * - * B) opcodes and prefixes. - * - * The register called opcode holds in its low order 8 bits the opcode - * (second byte if the first byte is 0x0f). More precisely it holds the - * last byte fetched before the modrm byte or the immediate operand(s) - * of the instruction, if any. High order 24 bits are zero unless the - * instruction has prefixes. These higher order bits have the following - * meaning: - * 0x80000000 segment override prefix - * 0x00001000 repnz prefix (0xf2) - * 0x00000800 repz prefix (0xf3) - * 0x00000400 address size prefix (0x67) - * 0x00000200 operand size prefix (0x66) - * (bit 0x1000 and 0x800 cannot be set simultaneously) - * - * Therefore if there is a segment override the value will be between very - * negative (between 0x80000000 and 0x800016ff), if there is no segment - * override, the value will be between 0 and 0x16ff. The reason for - * this choice will be understood in the next part. - * - * C) addresing mode description tables. - * - * the encoding of the modrm bytes (especially in 16 bit mode) is quite - * complex. Hence a table, indexed by the five useful bits of the modrm - * byte is used to simplify decoding. Here is a description: - * - * bit mask meaning - * 0x80000000 use ss as default segment register - * 0x00004000 means that this addressing mode needs a base register - * (set for all entries except sib and displacement-only) - * 0x00002000 set if preceding is not set - * 0x00001000 set if an sib follows - * 0x00000700 base register to use (16 and 32 bit) - * 0x00000080 set in 32 bit addressing mode table, cleared in 16 bit - * (so extsb mask,entry; ori mask,mask,0xffff gives a mask) - * 0x00000070 kludge field, possible values are - * 0: 16 bit addressing mode without index - * 10: 32 bit addressing mode - * 60: 16 bit addressing mode with %si as index - * 70: 16 bit addressing mode with %di as index - * - * This convention leads to the following special values used to check for - * sib present and displacement-only, which happen to the three lowest - * values in the table (unsigned): - * 0x00003090 sib follows (implies it is a 32 bit mode) - * 0x00002090 32 bit displacement-only - * 0x00002000 16 bit displacement-only - * - * This means that all entries are either very negative in the 0x80002000 - * range if the segment defaults to ss or higher than 0x2000 if it defaults - * to ds. Combined with the value in opcode this gives the following table: - * opcode entry entry>opcode ? segment to use - * positive positive yes ds (default) - * negative positive yes overriden by prefix - * positive negative no ss - * negative negative yes overridden by prefix - * - * Hence a simple comparison allows to check for the need to override - * the current base with ss, i.e., when ss is the default base and the - * instruction has no override prefix. - * - * D) BUGS - * - * This software is obviously bug-free :-). Nevertheless, if you encounter - * an interesting feature. Mail me a note, if possible with a detailed - * instruction example showing where and how it fails. - * - */ - -/* Now the details of flag evaluation with the necessary macros */ - -/* Alignment check is toggable so the system believes it is a 486, but -CPUID is not to avoid unnecessary complexities. However, alignment -is actually never checked (real mode is CPL 0 anyway). */ -#define AC86 13 /* Can only be toggled */ -#define VM86 14 /* Not used for now */ -#define RF86 15 /* Not emulated precisely */ -/* Actually NT and IOPL are kept in memory */ -#define NT86 17 -#define IOPL86 18 /* Actually 18 and 19 */ -#define OF86 20 -#define DF86 21 -#define IF86 22 -#define TF86 23 -#define SF86 24 -#define ZF86 25 -#define AF86 27 -#define PF86 29 -#define CF86 31 - -/* Where the less important flags are placed in PPC cr */ -#define RF 20 /* Suppress trap flag: cr5[0] */ -#define DF 21 /* Direction flag: cr5[1] */ -#define IF 22 /* Interrupt flag: cr5[2] */ -#define TF 23 /* Single step flag: cr5[3] */ - -/* Now the flags which are frequently used */ -/* - * CF_IN is a copy of the input carry with PPC polarity, - * it is cleared for add, set for sub and cmp, - * equal to the x86 carry for adc and to its complement for sbb. - * it is used to evaluate AF and CF. - */ -#define CF_IN 0x80000000 - -/* #define GET_CF_IN(dst) rlwinm dst,flags,1,0x01 */ - -/* CF_IN_CR set in flags means that cr4[0] is a copy of carry bit */ -#define CF_IN_CR 0x40000000 - -#define EVAL_CF andis. r3,flags,(CF_IN_CR)>>16; beql- _eval_cf - -/* - * CF_STATE tells how to compute the carry bit. - * NOTRESULT16 and NOTRESULT8 are never set explicitly, - * but they may happen after a cmc instruction. - */ -#define CF 16 /* cr4[0] */ -#define CF_LOCATION 0x30000000 -#define CF_ZERO 0x00000000 -#define CF_EXPLICIT 0x00000000 -#define CF_COMPLEMENT 0x08000000 /* Indeed a polarity bit */ -#define CF_STATE_MASK (CF_LOCATION|CF_COMPLEMENT) -#define CF_VALUE 0x08000000 -#define CF_SET 0x08000000 -#define CF_RES32 0x10000000 -#define CF_NOTRES32 0x18000000 -#define CF_RES16 0x20000000 -#define CF_NOTRES16 0x28000000 -#define CF_RES8 0x30000000 -#define CF_NOTRES8 0x38000000 - -#define CF_ADDL CF_RES32 -#define CF_SUBL CF_NOTRES32 -#define CF_ADDW CF_RES16 -#define CF_SUBW CF_RES16 -#define CF_ADDB CF_RES8 -#define CF_SUBB CF_RES8 - -#define CF_ROTCNT(dst) rlwinm dst,flags,7,0x18 -#define CF_POL(dst,pos) rlwinm dst,flags,(36-pos)%32,pos,pos -#define CF_POL_INSERT(dst,pos) \ - rlwimi dst,flags,(36-pos)%32,pos,pos -#define RES2CF(dst) rlwinm dst,result,8,7,15 - -/* - * OF_STATE tells how to compute the overflow bit. When the low order bit - * is set (OF_EXPLICIT), it means that OF is the exclusive or of the - * two other bits. For the reason of this choice, see rotate instructions. - */ -#define OF 1 /* Only after EVAL_OF */ -#define OF_STATE_MASK 0x07000000 -#define OF_INCDEC 0x00000000 -#define OF_EXPLICIT 0x01000000 -#define OF_ZERO 0x01000000 -#define OF_VALUE 0x04000000 -#define OF_SET 0x04000000 -#define OF_ONE 0x05000000 -#define OF_XOR 0x06000000 -#define OF_ARITHL 0x06000000 -#define OF_ARITHW 0x02000000 -#define OF_ARITHB 0x04000000 - -#define EVAL_OF rlwinm. r3,flags,6,0,1; bngl+ _eval_of; andis. r3,flags,OF_VALUE>>16 - -/* See _eval_of to see how this can be used */ -#define OF_ROTCNT(dst) rlwinm dst,flags,10,0x1c - -/* - * SIGNED_IN_CR means that cr6 is set as after a signed compare: - * - cr6[0] is SF^OF for jl/jnl/setl/setnl... - * - cr6[1] is ~((SF^OF)+ZF) for jg/jng/setg/setng... - * - cr6[2] is ZF (ZF_IN_CR is always set if this bit is set) - */ -#define SLT 24 /* cr6[0], signed less than */ -#define SGT 25 /* cr6[1], signed greater than */ -#define SIGNED_IN_CR 0x00800000 - -#define EVAL_SIGNED andis. r3,flags,SIGNED_IN_CR>>16; beql- _eval_signed - -/* - * Above in CR means that cr4 is set as after an unsigned compare: - * - cr4[0] is CF (CF_IN_CR is also set) - * - cr4[1] is ~(CF+ZF) (ZF_IN_CR is also set) - */ -#define ABOVE 17 /* cr4[1] */ -#define ABOVE_IN_CR 0x00400000 - -#define EVAL_ABOVE andis. r3,flags,ABOVE_IN_CR>>16; beql- _eval_above - -/* SF_IN_CR means cr6[0] is a copy of SF. It implies ZF_IN_CR is also set */ -#define SF 24 /* cr6[0] */ -#define SF_IN_CR 0x00200000 - -#define EVAL_SF andis. r3,flags,SF_IN_CR>>16; beql- _eval_sf_zf - -/* ZF_IN_CR means cr6[2] is a copy of ZF. */ -#define ZF 26 -#define ZF_IN_CR 0x00100000 - -#define EVAL_ZF andis. r3,flags,ZF_IN_CR>>16; beql- _eval_sf_zf -#define ZF2ZF86(s,d) rlwimi d,s,ZF-ZF86,ZF86,ZF86 -#define ZF862ZF(reg) rlwimi reg,reg,32+ZF86-ZF,ZF,ZF - -/* - * ZF_PROTECT means cr6[2] is the only valid value for ZF. This is necessary - * because some infrequent instructions may leave SF and ZF in an apparently - * inconsistent state (both set): sahf, popf and the few (not implemented) - * instructions that only affect ZF. - */ -#define ZF_PROTECT 0x00080000 - -/* The parity is always evaluated when it is needed */ -#define PF 0 /* Only after EVAL_PF */ -#define EVAL_PF bl _eval_pf - -/* This field gives the shift amount to use to evaluate SF - and ZF when ZF_PROTECT is not set */ -#define RES_SIZE_MASK 0x00060000 -#define RESL 0x00000000 -#define RESW 0x00040000 -#define RESB 0x00060000 - -#define RES_SHIFT(dst) rlwinm dst,flags,18,0x18 - -/* SUBTRACTING is set if the last flag setting instruction was sub/sbb/cmp, - used to evaluate OF and AF */ -#define SUBTRACTING 0x00010000 - -#define GET_ADDSUB(dst) rlwinm dst,flags,16,0x01 - -/* rotate (rcl/rcr/rol/ror) affect CF and OF but not other flags */ -#define ROTATE_MASK (CF_IN_CR|CF_STATE_MASK|ABOVE_IN_CR|OF_STATE_MASK|SIGNED_IN_CR) -#define ROTATE_FLAGS rlwimi flags,one,24,ROTATE_MASK - -/* - * INCDEC_FIELD has at most one bit set when the last flag setting instruction - * was either inc or dec (which do not affect the carry). When one of these - * bits is set, it affects the way OF, SF, ZF, AF, and PF are evaluated. - */ -#define INCDEC_FIELD 0x0000ff00 - -#define DECB_SHIFT 8 -#define INCB_SHIFT 9 -#define DECW_SHIFT 10 -#define INCW_SHIFT 11 -#define DECL_SHIFT 14 -#define INCL_SHIFT 15 - -#define INCDEC_MASK (OF_STATE_MASK|SIGNED_IN_CR|ABOVE_IN_CR|SF_IN_CR|\ - ZF_IN_CR|ZF_PROTECT|RES_SIZE_MASK|SUBTRACTING|\ - INCDEC_FIELD) -/* Operations to perform to tell where the flags are after inc or dec */ -#define INC_FLAGS(BWL) rlwimi flags,one,INC##BWL##_SHIFT,INCDEC_MASK -#define DEC_FLAGS(BWL) rlwimi flags,one,DEC##BWL##_SHIFT,INCDEC_MASK - -/* How the flags are set after arithmetic operations */ -#define FLAGS_ADD(BWL) (CF_ADD##BWL|OF_ARITH##BWL|RES##BWL) -#define FLAGS_SBB(BWL) (CF_SUB##BWL|OF_ARITH##BWL|RES##BWL|SUBTRACTING) -#define FLAGS_SUB(BWL) FLAGS_SBB(BWL)|CF_IN -#define FLAGS_CMP(BWL) FLAGS_SUB(BWL)|ZF_IN_CR|CF_IN_CR|SIGNED_IN_CR|ABOVE_IN_CR - -/* How the flags are set after logical operations */ -#define FLAGS_LOG(BWL) (CF_ZERO|OF_ZERO|RES##BWL) -#define FLAGS_TEST(BWL) FLAGS_LOG(BWL)|ZF_IN_CR|SIGNED_IN_CR|SF_IN_CR - -/* How the flags are set after bt/btc/btr/bts. */ -#define FLAGS_BTEST CF_IN_CR|CF_ADDL|OF_ZERO|RESL - -/* How the flags are set after bsf/bsr. */ -#define FLAGS_BSRCH(WL) CF_ZERO|OF_ZERO|RES##WL|ZF_IN_CR - -/* How the flags are set after logical right shifts */ -#define FLAGS_SHR(BWL) (CF_EXPLICIT|OF_ARITH##BWL|RES##BWL) - -/* How the flags are set after double length shifts */ -#define FLAGS_DBLSH(WL) (CF_EXPLICIT|OF_ARITH##WL|RES##WL) - -/* How the flags are set after multiplies */ -#define FLAGS_MUL (CF_EXPLICIT|OF_EXPLICIT) - -#define SET_FLAGS(fl) lis flags,(fl)>>16 -#define ADD_FLAGS(fl) addis flags,flags,(fl)>>16 - -/* - * We are always off by one when compared with Intel's eip, this shortens - * code by allowing to load next byte with lbzu x,1(eip). The register - * called eip actually contains csbase+eip, and thus should be called lip - * for linear ip. - */ - -/* - * Reason codes passed to the C part of the emulator, this includes all - * instructions which may change the current code segment. These definitions - * will soon go into a separate include file. Codes 0 to 255 correspond - * directly to the interrupt/trap that has to be generated. - */ - -#define code_divide_err 0 -#define code_trap 1 -#define code_int3 3 -#define code_into 4 -#define code_bound 5 -#define code_ud 6 -#define code_dna 7 /* FPU not available */ - -#define code_iretw 256 /* Interrupt returns */ -#define code_iretl 257 -#define code_lcallw 258 /* Far calls and jumps */ -#define code_lcalll 259 -#define code_ljmpw 260 -#define code_ljmpl 261 -#define code_lretw 262 /* Far returns */ -#define code_lretl 263 -#define code_softint 264 /* int $xx */ -#define code_lock 265 /* Lock prefix */ -/* Codes 1024 to 2047 are used for I/O port access instructions: - - The three LSB define the port size (1, 2 or 4) - - bit of weight 512 means out if set, in if clear - - bit of weight 256 means ins/outs if set, in/out if clear - - bit of weight 128 means use 32 bit addresses if set, 16 bit if clear - (only used for ins/outs instructions, always clear for in/out) - */ -#define code_inb 1024+1 -#define code_inw 1024+2 -#define code_inl 1024+4 -#define code_outb 1024+512+1 -#define code_outw 1024+512+2 -#define code_outl 1024+512+4 -#define code_insb_a16 1024+256+1 -#define code_insw_a16 1024+256+2 -#define code_insl_a16 1024+256+4 -#define code_outsb_a16 1024+512+256+1 -#define code_outsw_a16 1024+512+256+2 -#define code_outsl_a16 1024+512+256+4 -#define code_insb_a32 1024+256+128+1 -#define code_insw_a32 1024+256+128+2 -#define code_insl_a32 1024+256+128+4 -#define code_outsb_a32 1024+512+256+128+1 -#define code_outsw_a32 1024+512+256+128+2 -#define code_outsl_a32 1024+512+256+128+4 - -#define state 31 -/* r31 (state) is a pointer to a structure describing the emulated x86 -processor, its layout is the following: - -first the general purpose registers, they are in little endian byte order - -offset name - - 0 eax/ax/al - 1 ah - 4 ecx/cx/cl - 5 ch - 8 edx/dx/dl - 9 dh - 12 ebx/bx/bl - 13 bh - 16 esp/sp - 20 ebp/bp - 24 esi/si - 28 edi/di -*/ - -#define AL 0 -#define AX 0 -#define EAX 0 -#define AH 1 -#define CL 4 -#define CX 4 -#define ECX 4 -#define DX 8 -#define EDX 8 -#define BX 12 -#define EBX 12 -#define SP 16 -#define ESP 16 -#define BP 20 -#define EBP 20 -#define SI 24 -#define ESI 24 -#define DI 28 -#define EDI 28 - -/* -than the rest of the machine state, big endian ! - -offset name - - 32 essel segment register selectors (values) - 36 cssel - 40 sssel - 44 dssel - 48 fssel - 52 gssel - 56 eipimg true eip (register named eip is csbase+eip) - 60 eflags eip and eflags only valid when C code running ! - 64 esbase segment registers bases - 68 csbase - 72 ssbase - 76 dsbase - 80 fsbase - 84 gsbase - 88 iobase For I/O instructions, I/O space virtual base - 92 ioperm I/O permission bitmap pointer - 96 reason Reason code when calling external emulator - 100 nexteip eip past instruction for external emulator - 104 parm1 parameter for external emulator - 108 parm2 parameter for external emulator - 112 _opcode current opcode register for external emulator - 116 _base segment register base for external emulator - 120 _offset intruction operand offset - More internal state was dumped here for debugging in first versions - - 128 vbase where the 1Mb memory is mapped - 132 cntimg instruction counter - 136 scratch - 192 eipstat array of 32k unsigned long pairs for eip stats -*/ - -#define essel 32 -#define cssel 36 -#define sssel 40 -#define dssel 44 -#define fssel 48 -#define gssel 52 -#define eipimg 56 -#define eflags 60 -#define esbase 64 -#define csbase 68 -#define ssbase 72 -#define dsbase 76 -#define fsbase 80 -#define gsbase 84 -#define iobase 88 -#define ioperm 92 -#define reason 96 -#define nexteip 100 -#define parm1 104 -#define parm2 108 -#define _opcode 112 -#define _base 116 -#define _offset 120 -#define vbase 128 -#define cntimg 132 -#ifdef EIP_STATS -#define eipstat 192 -#endif -/* Global registers */ - -/* Some segment register bases are permanently kept in registers since they -are often used: these are csb, esb and ssb because they are -required for jumps, string instructions, and pushes/pops/calls/rets. -dsbase is not kept in a register but loaded from memory to allow somewhat -more parallelism in the main emulation loop. -*/ - -#define one 30 /* Constant one, so pervasive */ -#define ssb 29 -#define csb 28 -#define esb 27 -#define eip 26 /* That one is indeed csbase+(e)ip-1 */ -#define result 25 /* For the use of result, op1, op2 */ -#define op1 24 /* see the section on flag emulation */ -#define op2 23 -#define opbase 22 /* default opcode table */ -#define flags 21 /* See earlier description */ -#define opcode 20 /* Opcode */ -#define opreg 19 /* Opcode extension/register number */ -/* base is reloaded with the base of the ds segment at the beginning of -every instruction, it is modified by segment override prefixes, when -the default base segment is ss, or when the modrm byte specifies a -register operand */ -#define base 18 /* Instruction's operand segment base */ -#define offset 17 /* Instruction's memory operand offset */ -/* used to address a table telling how to decode the addressing mode -specified by the modrm byte */ -#define adbase 16 /* addressing mode table */ -/* Following registers are used only as dedicated temporaries during decoding, -they are free for use during emulation */ -/* - * ceip (current eip) is only in use when we call the external emulator for - * instructions that fault. Note that it is forbidden to change flags before - * the check for the fault happens (divide by zero...) ! ceip is also used - * when measuring timing. - */ -#define ceip 15 - -/* A register used to measure timing information (when enabled) */ -#ifdef EIP_STATS -#define tstamp 14 -#endif - -#define count 12 /* Instruction counter. */ - -#define r0 0 -#define r1 1 /* PPC Stack pointer. */ -#define r3 3 -#define r4 4 -#define r5 5 -#define r6 6 -#define r7 7 - -/* Macros to read code stream */ -#define NEXTBYTE(dest) lbzu dest,1(eip) -#define NEXTWORD(dest) lhbrx dest,eip,one; la eip,2(eip) -#define NEXTDWORD(dest) lwbrx dest,eip,one; la eip,4(eip) -#define NEXT b nop -#define GOTNEXT b gotopcode - -#ifdef __BOOT__ - START_GOT - GOT_ENTRY(_jtables) - GOT_ENTRY(jtab_www) - GOT_ENTRY(adtable) - END_GOT -#else - .text -#endif - .align 2 - .global em86_enter - .type em86_enter,@function -em86_enter: stwu r1,-96(r1) # allocate stack - mflr r0 - stmw 14,24(r1) - mfcr r4 - stw r0,100(r1) - mr state,r3 - stw r4,20(r1) -#ifdef __BOOT__ -/* We need this since r30 is the default GOT pointer */ -#define r30 30 - GET_GOT -/* The relocation of these tables is explicit, this could be done - * automatically with fixups but would add more than 8kb in the fixup tables. - */ - lwz r3,GOT(_jtables) - lwz r4,_endjtables-_jtables(r3) - sub. r4,r3,r4 - beq+ 1f - li r0,((_endjtables-_jtables)>>2)+1 - addi r3,r3,-4 - mtctr r0 -0: lwzu r5,4(r3) - add r5,r5,r4 - stw r5,0(r3) - bdnz 0b -1: lwz adbase,GOT(adtable) - lwz opbase,GOT(jtab_www) -/* Now r30 is only used as constant 1 */ -#undef r30 - li one,1 # pervasive constant -#else - lis opbase,jtab_www@ha - lis adbase,adtable@ha - li one,1 # pervasive constant - addi opbase,opbase,jtab_www@l - addi adbase,adbase,adtable@l -#ifdef EIP_STATS - li ceip,0 - mftb tstamp -#endif -#endif -/* We branch back here when calling an external function tells us to resume */ -restart: lwz r3,eflags(state) - lis flags,(OF_EXPLICIT|ZF_IN_CR|ZF_PROTECT|SF_IN_CR)>>16 - lwz csb,csbase(state) - extsb result,r3 # SF/PF - rlwinm op1,r3,31,0x08 # AF - lwz eip,eipimg(state) - ZF862ZF(r3) # cr6 - addi op2,op1,0 # AF - lwz ssb,ssbase(state) - rlwimi flags,r3,15,OF_VALUE # OF - rlwimi r3,r3,32+RF86-RF,RF,RF # RF - lwz esb,esbase(state) - ori result,result,0xfb # PF - mtcrf 0x06,r3 # RF/DF/IF/TF/SF/ZF - lbzux opcode,eip,csb - rlwimi flags,r3,27,CF_VALUE # CF - xori result,result,0xff # PF - lwz count,cntimg(state) - GOTNEXT # start the emulator - -/* Now return */ -exit: lwz r0,100(r1) - lwz r4,20(r1) - mtlr r0 - lmw 14,24(r1) - mtcr r4 - addi r1,r1,96 - blr - -trap: crmove 0,RF - crclr RF - bt- 0,resume - sub ceip,eip,csb - li r3,code_trap -complex: addi eip,eip,1 - stw r3,reason(state) - sub eip,eip,csb - stw op1,240(state) - stw op2,244(state) - stw result,248(state) - stw flags,252(state) - stw r4,parm1(state) - stw r5,parm2(state) - stw opcode,_opcode(state) - bl _eval_flags - stw base,_base(state) - stw eip,nexteip(state) - stw r3,eflags(state) - mr r3,state - stw offset,_offset(state) - stw ceip,eipimg(state) - stw count,cntimg(state) - bl em86_trap - cmpwi r3,0 - bne exit - b restart - -/* Main loop */ -/* - * The two LSB of each entry in the main table mean the following: - * 00: indirect opcode: modrm follows and the three middle bits are an - * opcode extension. The entry points to another jump table. - * 01: direct instruction, branch directly to the routine. - * 10: modrm specifies byte size memory and register operands. - * 11: modrm specifies word/long memory and register operands. - * - * The modrm byte, if present, is always loaded in r7. - * - * Note: most "mr x,y" instructions have been replaced by "addi x,y,0" since - * the latter can be executed in the second integer unit on 603e. - */ - -/* - * This code is very good example of absolutely unmaintainable code. - * It was actually much easier to write than it is to understand ! - * If my computations are right, the maximum path length from fetching - * the opcode to exiting to the actual instruction execution is - * 46 instructions (for non-prefixed, single byte opcode instructions). - * - */ - .align 5 -#ifdef EIP_STATS -nop: NEXTBYTE(opcode) -gotopcode: slwi r3,opcode,2 - bt- TF,trap -resume: lwzx r4,opbase,r3 - addi r5,state,eipstat+4 - clrlslwi r6,ceip,17,3 - mtctr r4 - lwzux r7,r5,r6 - slwi. r0,r4,30 # two lsb of table entry - sub r7,r7,tstamp - lwz r6,-4(r5) - mftb tstamp - addi r6,r6,1 - sub ceip,eip,csb - stw r6,-4(r5) - add r7,r7,tstamp - lwz base,dsbase(state) - stw r7,0(r5) -#else -nop: NEXTBYTE(opcode) -gotopcode: slwi r3,opcode,2 - bt- TF,trap -resume: lwzx r4,opbase,r3 - sub ceip,eip,csb - mtctr r4 - slwi. r0,r4,30 # two lsb of table entry - lwz base,dsbase(state) - addi count,count,1 -#endif - bgtctr- # for instructions without modrm - -/* modrm byte present */ - NEXTBYTE(r7) # modrm byte - cmplwi cr1,r7,192 - rlwinm opreg,r7,31,0x1c - beq- cr0,8f # extended opcode -/* modrm with middle 3 bits specifying a register (non prefixed) */ - rlwinm r0,r4,3,0x8 - li r4,0x1c0d - rlwimi opreg,r7,27,0x01 - srw r4,r4,r0 - and opreg,opreg,r4 - blt cr1,9f -/* modrm with 2 register operands */ -1: rlwinm offset,r7,2,0x1c - addi base,state,0 - rlwimi offset,r7,30,0x01 - and offset,offset,r4 - bctr - -/* Prefixes: first segment overrides */ - .align 4 -_es: NEXTBYTE(r7); addi base,esb,0 - oris opcode,opcode,0x8000; b 2f -_cs: NEXTBYTE(r7); addi base,csb,0 - oris opcode,opcode,0x8000; b 2f -_fs: NEXTBYTE(r7); lwz base,fsbase(state) - oris opcode,opcode,0x8000; b 2f -_gs: NEXTBYTE(r7); lwz base,gsbase(state) - oris opcode,opcode,0x8000; b 2f -_ss: NEXTBYTE(r7); addi base,ssb,0 - oris opcode,opcode,0x8000; b 2f -_ds: NEXTBYTE(r7) - oris opcode,opcode,0x8000; b 2f - -/* Lock (unimplemented) and repeat prefixes */ -_lock: li r3,code_lock; b complex -_repnz: NEXTBYTE(r7); rlwimi opcode,one,12,0x1800; b 2f -_repz: NEXTBYTE(r7); rlwimi opcode,one,11,0x1800; b 2f - -/* Operand and address size prefixes */ - .align 4 -_opsize: NEXTBYTE(r7); ori opcode,opcode,0x200 - rlwinm r3,opcode,2,0x1ffc; b 2f -_adsize: NEXTBYTE(r7); ori opcode,opcode,0x400 - rlwinm r3,opcode,2,0x1ffc; b 2f - -_twobytes: NEXTBYTE(r7); addi r3,r3,0x400 -2: rlwimi r3,r7,2,0x3fc - lwzx r4,opbase,r3 - rlwimi opcode,r7,0,0xff - mtctr r4 - slwi. r0,r4,30 - bgtctr- # direct instruction -/* modrm byte in a prefixed instruction */ - NEXTBYTE(r7) # modrm byte - cmpwi cr1,r7,192 - rlwinm opreg,r7,31,0x1c - beq- 6f -/* modrm with middle 3 bits specifying a register (prefixed) */ - rlwinm r0,r4,3,0x8 - li r4,0x1c0d - rlwimi opreg,r7,27,0x01 - srw r4,r4,r0 - and opreg,opreg,r4 - bnl cr1,1b # 2 register operands -/* modrm specifying memory with prefix */ -3: rlwinm r3,r3,27,0xff80 - rlwimi adbase,r7,2,0x1c - extsh r3,r3 - rlwimi r3,r7,31,0x60 - lwzx r4,r3,adbase - cmpwi cr1,r4,0x3090 - bnl+ cr1,10f -/* displacement only addressing modes */ -4: cmpwi r4,0x2000 - bne 5f - NEXTWORD(offset) - bctr -5: NEXTDWORD(offset) - bctr -/* modrm with opcode extension (prefixed) */ -6: lwzx r4,r4,opreg - mtctr r4 - blt cr1,3b -/* modrm with opcode extension and register operand */ -7: rlwinm offset,r7,2,0x1c - addi base,state,0 - rlwinm r0,r4,3,0x8 - li r4,0x1c0d - rlwimi offset,r7,30,0x01 - srw r4,r4,r0 - and offset,offset,r4 - bctr -/* modrm with opcode extension (non prefixed) */ -8: lwzx r4,r4,opreg - mtctr r4 -/* FIXME ? We continue fetching even if the opcode extension is undefined. - * It shouldn't do any harm on real mode emulation anyway, and for ROM - * BIOS emulation, we are supposed to read valid code. - */ - bnl cr1,7b -/* modrm specifying memory without prefix */ -9: rlwimi adbase,r7,2,0x1c # memory addressing mode computation - rlwinm r3,r7,31,0x60 - lwzx r4,r3,adbase - cmplwi cr1,r4,0x3090 - blt- cr1,4b # displacement only addressing mode -10: rlwinm. r0,r7,24,0,1 # three cases distinguished - beq- cr1,15f # an sib follows - rlwinm r3,r4,30,0x1c # 16bit/32bit/%si index/%di index - cmpwi cr1,r3,8 # set cr1 as early as possible - rlwinm r6,r4,26,0x1c # base register - lwbrx offset,state,r6 # load the base register - beq cr0,14f # no displacement - cmpw cr2,r4,opcode # check for ss as default base - bgt cr0,12f # byte offset - beq cr1,11f # 32 bit displacement - NEXTWORD(r5) # 16 bit displacement - bgt cr1,13f # d16(base,index) -/* d16(base) */ - add offset,offset,r5 - clrlwi offset,offset,16 - bgtctr cr2 - addi base,ssb,0 - bctr -/* d32(base) */ -11: NEXTDWORD(r5) - add offset,offset,r5 - bgtctr cr2 - addi base,ssb,0 - bctr -/* 8 bit displacement */ -12: NEXTBYTE(r5) - extsb r5,r5 - bgt cr1,13f -/* d8(base) */ - extsb r6,r4 - add offset,offset,r5 - ori r6,r6,0xffff - and offset,offset,r6 - bgtctr cr2 - addi base,ssb,0 - bctr -/* d8(base,index) and d16(base,index) share this code ! */ -13: lhbrx r3,state,r3 - add offset,offset,r5 - add offset,offset,r3 - clrlwi offset,offset,16 - bgtctr cr2 - addi base,ssb,0 - bctr -/* no displacement: only indexed modes may use ss as default base */ -14: beqctr cr1 # 32 bit register indirect - clrlwi offset,offset,16 - bltctr cr1 # 16 bit register indirect -/* (base,index) */ - lhbrx r3,state,r3 # 16 bit [{bp,bx}+{si,di}] - cmpw cr2,r4,opcode # check for ss as default base - add offset,offset,r3 - clrlwi offset,offset,r3 - bgtctr+ cr2 - addi base,ssb,0 - bctr -/* sib modes, note that the size of the offset can be known from cr0 */ -15: NEXTBYTE(r7) # get sib - rlwinm r3,r7,31,0x1c # index - rlwinm offset,r7,2,0x1c # base - cmpwi cr1,r3,ESP # has index ? - bne cr0,18f # base+d8/d32 - cmpwi offset,EBP - beq 17f # d32(,index,scale) - xori r4,one,0xcc01 # build 0x0000cc00 - rlwnm r4,r4,offset,0,1 # 0 or 0xc0000000 - lwbrx offset,state,offset - cmpw cr2,r4,opcode # use ss ? - beq- cr1,16f # no index -/* (base,index,scale) */ - lwbrx r3,state,r3 - srwi r6,r7,6 - slw r3,r3,r6 - add offset,offset,r3 - bgtctr cr2 - addi base,ssb,0 - bctr -/* (base), in practice only (%esp) is coded this way */ -16: bgtctr cr2 - addi base,ssb,0 - bctr -/* d32(,index,scale) */ -17: NEXTDWORD(offset) - beqctr- cr1 # no index: very unlikely - lwbrx r3,state,r3 - srwi r6,r7,6 - slw r3,r3,r6 - add offset,offset,r3 - bctr -/* 8 or 32 bit displacement */ -18: xori r4,one,0xcc01 # build 0x0000cc00 - rlwnm r4,r4,offset,0,1 # 0 or 0xc0000000 - lwbrx offset,state,offset - cmpw cr2,r4,opcode # use ss ? - bgt cr0,20f # 8 bit offset -/* 32 bit displacement */ - NEXTDWORD(r5) - beq- cr1,21f -/* d(base,index,scale) */ -19: lwbrx r3,state,r3 - add offset,offset,r5 - add offset,offset,r3 - bgtctr cr2 - addi base,ssb,0 - bctr -/* 8 bit displacement */ -20: NEXTBYTE(r5) - extsb r5,r5 - bne+ cr1,19b -/* d(base), in practice base is %esp */ -21: add offset,offset,r5 - bgtctr- cr2 - addi base,ssb,0 - bctr - -/* - * Flag evaluation subroutines: they have not been written for performance - * since they are not often used in practice. The rule of the game was to - * write them with as few branches as possible. - * The first routines eveluate either one or 2 (ZF and SF simultaneously) - * flags and do not use r0 and r7. - * The more complex routines (_eval_above, _eval_signed and _eval_flags) - * call the former ones, using r0 as a return address save register and - * r7 as a safe temporary. - */ - -/* - * _eval_sf_zf evaluates simultaneously SF and ZF unless ZF is already valid - * and protected because it is possible, although it is exceptional, to have - * SF and ZF set at the same time after a few instructions which may leave the - * flags in this apparently inconsistent state: sahf, popf, iret and the few - * (for now unimplemented) instructions which only affect ZF (lar, lsl, arpl, - * cmpxchg8b). This also solves the obscure case of ZF set and PF clear. - * On return: SF=cr6[0], ZF=cr6[2]. - */ - -_eval_sf_zf: andis. r5,flags,ZF_PROTECT>>16 - rlwinm r3,flags,0,INCDEC_FIELD - RES_SHIFT(r4) - cntlzw r3,r3 - slw r4,result,r4 - srwi r5,r3,5 # ? use result : use op1 - rlwinm r3,r3,2,0x18 - oris flags,flags,(SF_IN_CR|SIGNED_IN_CR|ZF_IN_CR)>>16 - neg r5,r5 # mux result/op2 - slw r3,op2,r3 - and r4,r4,r5 - andc r3,r3,r5 - xoris flags,flags,(SIGNED_IN_CR)>>16 - bne- 1f # 12 instructions between set - or r3,r3,r4 # and test, good for folding - cmpwi cr6,r3,0 - blr -1: or. r3,r3,r4 - crmove SF,0 - blr - -/* - * _eval_cf may be called at any time, no other flag is affected. - * On return: CF=cr4[0], r3= CF ? 0x100:0 = CF<<8. - */ -_eval_cf: addc r3,flags,flags # CF_IN to xer[ca] - RES2CF(r4) # get 8 or 16 bit carry - subfe r3,result,op1 # generate PPC carry for - CF_ROTCNT(r5) # preceding operation - addze r3,r4 # put carry into LSB - CF_POL(r4,23) # polarity & 0x100 - oris flags,flags,(CF_IN_CR|ABOVE_IN_CR)>>16 - rlwnm r3,r3,r5,23,23 # shift carry there - xor r3,r3,r4 # CF <<8 - xoris flags,flags,(ABOVE_IN_CR)>>16 - cmplw cr4,one,r3 # sets cr4[0] - blr - -/* - * eval_of returns the overflow flag in OF_STATE field, which will be - * either 001 (OF clear) or 101 (OF set), is is only called when the two - * low order bits of OF_STATE are not 01 (otherwise it will work but - * it is an elaborate variant of a nop with a few registers destroyed) - * The code multiplexes several sources in a branchless way, was fun to write. - */ -_eval_of: GET_ADDSUB(r4) # 0(add)/1(sub) - rlwinm r3,flags,0,INCDEC_FIELD - neg r4,r4 # 0(add)/-1(sub) - eqv r5,result,op1 # result[]==op1[] (bit by bit) - cntlzw r3,r3 # inc/dec - xor r4,r4,op2 # true sign of op2 - oris r5,r5,0x0808 # bits to clear - clrlwi r6,r3,31 # 0(inc)/1(dec) - eqv r4,r4,op1 # op1[]==op2[] (bit by bit) - add r6,op2,r6 # add 1 if dec - rlwinm r3,r3,2,0x18 # incdec_shift - andc r4,r4,r5 # arithmetic overflow - slw r3,r6,r3 # shifted inc/dec result - addis r3,r3,0x8000 # compare with 0x80000000 - ori r4,r4,0x0808 # bits to set - cntlzw r3,r3 # 32 if inc/dec overflow - OF_ROTCNT(r6) - rlwimi r4,r3,18,0x00800000 # insert inc/dec overflow - rlwimi flags,one,24,OF_STATE_MASK - rlwnm r3,r4,r6,8,8 # get field - rlwimi flags,r3,3,OF_VALUE # insert OF - blr - -/* - * _eval_pf will always be called when needed (complex but infrequent), - * there are a few quirks for a branchless solution. - * On return: PF=cr0[0], PF=MSB(r3) - */ -_eval_pf: rlwinm r3,flags,0,INCDEC_FIELD - rotrwi r4,op2,4 # from inc/dec - rotrwi r5,result,4 # from result - cntlzw r3,r3 # use result if 32 - xor r4,r4,op2 - xor r5,r5,result - rlwinm r3,r3,26,0,0 # 32 becomes 0x80000000 - clrlwi r4,r4,28 - lis r6,0x9669 # constant to shift - clrlwi r5,r5,28 - rlwnm r4,r6,r4,0,0 # parity from inc/dec - rlwnm r5,r6,r5,0,0 # parity from result - andc r4,r4,r3 # select which one - and r5,r5,r3 - add. r3,r4,r5 # and test to simplify - blr # returns in r3 and cr0 set. - -/* - * _eval_af will always be called when needed (complex but infrequent): - * - if after inc, af is set when 4 low order bits of op1 are 0 - * - if after dec, af is set when 4 low order bits of op1 are 1 - * (or 0 after adding 1 as implemented here) - * - if after add/sub/adc/sbb/cmp af is set from sum of 4 LSB of op1 - * and 4 LSB of op2 (eventually complemented) plus carry in. - * - other instructions leave AF undefined so the returned value is irrelevant. - * Returned value must be masked with 0x10, since all other bits are undefined. - * There branchless code is perhaps not the most efficient, but quite parallel. - */ -_eval_af: rlwinm r3,flags,0,INCDEC_FIELD - clrlwi r5,op2,28 # 4 LSB of op2 - addc r4,flags,flags # carry_in - GET_ADDSUB(r6) - cntlzw r3,r3 # if inc/dec 16..23 else 32 - neg r6,r6 # add/sub - clrlwi r4,r3,31 # if dec 1 else 0 - xor r5,r5,r6 # conditionally complement - clrlwi r6,op1,28 # 4 LSB of op1 - add r4,op2,r4 # op2+(dec ? 1 : 0) - clrlwi r4,r4,28 # 4 LSB of op2+(dec ? 1 : 0) - adde r5,r6,r5 # op1+cy_in+(op2/~op2) - cntlzw r4,r4 # 28..31 if not AF, 32 if set - andc r5,r5,r3 # masked AF from add/sub... - andc r4,r3,r4 # masked AF from inc/dec - or r3,r4,r5 - blr - -/* - * _eval_above will only be called if ABOVE_IN_CR is not set. - * On return: ZF=cr6[2], CF=cr4[0], ABOVE=cr4[1] - */ -_eval_above: andis. r3,flags,ZF_IN_CR>>16 - mflr r0 - beql+ _eval_sf_zf - andis. r3,flags,CF_IN_CR>>16 - beql+ _eval_cf - mtlr r0 - oris flags,flags,ABOVE_IN_CR>>16 - crnor ABOVE,ZF,CF - blr - -/* _eval_signed may only be called when signed_in_cr is clear ! */ -_eval_signed: andis. r3,flags,SF_IN_CR>>16 - mflr r0 - beql+ _eval_sf_zf -/* SF_IN_CR and ZF_IN_CR are set, SIGNED_IN_CR is clear */ - rlwinm. r3,flags,5,0,1 - xoris flags,flags,(SIGNED_IN_CR|SF_IN_CR)>>16 - bngl+ _eval_of - andis. r3,flags,OF_VALUE>>16 - mtlr r0 - crxor SLT,SF,OF - crnor SGT,SLT,ZF - blr - -_eval_flags: mflr r0 - bl _eval_cf - li r7,2 - rlwimi r7,r3,24,CF86,CF86 # 2 if CF clear, 3 if set - bl _eval_pf - andis. r4,flags,SF_IN_CR>>16 - rlwimi r7,r3,32+PF-PF86,PF86,PF86 - bl _eval_af - rlwimi r7,r3,0,AF86,AF86 - beql+ _eval_sf_zf - mfcr r3 - rlwinm. r4,flags,5,0,1 - rlwimi r7,r3,0,DF86,SF86 - ZF2ZF86(r3,r7) - bngl+ _eval_of - mtlr r0 - lis r4,0x0004 - lwz r3,eflags(state) - addi r4,r4,0x7000 - rlwimi r7,flags,17,OF86,OF86 - and r3,r3,r4 - or r3,r3,r7 - blr - -/* Quite simple for real mode, input in r4, returns in r3. */ -_segment_load: lwz r5,vbase(state) - rlwinm r3,r4,4,0xffff0 # segment selector * 16 - add r3,r3,r5 - blr - -/* To allow I/O port virtualization if necessary, code for exception in r3, -port number in r4 */ -_check_port: lwz r5,ioperm(state) - rlwinm r6,r4,29,0x1fff # 0 to 8kB - lis r0,0xffff - lhbrx r5,r5,r6 - clrlwi r6,r4,29 # modulo 8 - rlwnm r0,r0,r3,0x0f # 1, 3, or 0xf - slw r0,r0,r6 - and. r0,r0,r5 - bne- complex - blr -/* - * Instructions are in approximate functional order: - * 1) move, exchange, lea, push/pop, pusha/popa - * 2) cbw/cwde/cwd/cdq, zero/sign extending moves, in/out - * 3) arithmetic: add/sub/adc/sbb/cmp/inc/dec/neg - * 4) logical: and/or/xor/test/not/bt/btc/btr/bts/bsf/bsr - * 5) jump, call, ret - * 6) string instructions and xlat - * 7) rotate/shift/mul/div - * 8) segment register, far jumps, calls and rets, interrupts - * 9) miscellenaous (flags, bcd,...) - */ - -#define MEM offset,base -#define REG opreg,state -#define SELECTORS 32 -#define SELBASES 64 - -/* Immediate moves */ -movb_imm_reg: rlwinm opreg,opcode,2,28,29; lbz r3,1(eip) - rlwimi opreg,opcode,30,31,31; lbzu opcode,2(eip) - stbx r3,REG; GOTNEXT - -movw_imm_reg: lhz r3,1(eip); clrlslwi opreg,opcode,29,2; lbzu opcode,3(eip) - sthx r3,REG; GOTNEXT - -movl_imm_reg: lwz r3,1(eip); clrlslwi opreg,opcode,29,2; lbzu opcode,5(eip) - stwx r3,REG; GOTNEXT - -movb_imm_mem: lbz r0,1(eip); cmpwi opreg,0 - lbzu opcode,2(eip); bne- ud - stbx r0,MEM; GOTNEXT - -movw_imm_mem: lhz r0,1(eip); cmpwi opreg,0 - lbzu opcode,3(eip); bne- ud - sthx r0,MEM; GOTNEXT - -movl_imm_mem: lwz r0,1(eip); cmpwi opreg,0 - lbzu opcode,5(eip); bne- ud - stwx r0,MEM; GOTNEXT - -/* The special short form moves between memory and al/ax/eax */ -movb_al_a32: lwbrx offset,eip,one; lbz r0,AL(state); lbzu opcode,5(eip) - stbx r0,MEM; GOTNEXT - -movb_al_a16: lhbrx offset,eip,one; lbz r0,AL(state); lbzu opcode,3(eip) - stbx r0,MEM; GOTNEXT - -movw_ax_a32: lwbrx offset,eip,one; lhz r0,AX(state); lbzu opcode,5(eip) - sthx r0,MEM; GOTNEXT - -movw_ax_a16: lhbrx offset,eip,one; lhz r0,AX(state); lbzu opcode,3(eip) - sthx r0,MEM; GOTNEXT - -movl_eax_a32: lwbrx offset,eip,one; lwz r0,EAX(state); lbzu opcode,5(eip) - stwx r0,MEM; GOTNEXT - -movl_eax_a16: lhbrx offset,eip,one; lwz r0,EAX(state); lbzu opcode,3(eip) - stwx r0,MEM; GOTNEXT - -movb_a32_al: lwbrx offset,eip,one; lbzu opcode,5(eip); lbzx r0,MEM - stb r0,AL(state); GOTNEXT - -movb_a16_al: lhbrx offset,eip,one; lbzu opcode,3(eip); lbzx r0,MEM - stb r0,AL(state); GOTNEXT - -movw_a32_ax: lwbrx offset,eip,one; lbzu opcode,5(eip); lhzx r0,MEM - sth r0,AX(state); GOTNEXT - -movw_a16_ax: lhbrx offset,eip,one; lbzu opcode,3(eip); lhzx r0,MEM - sth r0,AX(state); GOTNEXT - -movl_a32_eax: lwbrx offset,eip,one; lbzu opcode,5(eip); lwzx r0,MEM - stw r0,EAX(state); GOTNEXT - -movl_a16_eax: lhbrx offset,eip,one; lbzu opcode,3(eip); lwzx r0,MEM - stw r0,EAX(state); GOTNEXT - -/* General purpose move (all are exactly 4 instructions long) */ - .align 4 -movb_reg_mem: lbzx r0,REG - NEXTBYTE(opcode) - stbx r0,MEM - GOTNEXT - -movw_reg_mem: lhzx r0,REG - NEXTBYTE(opcode) - sthx r0,MEM - GOTNEXT - -movl_reg_mem: lwzx r0,REG - NEXTBYTE(opcode) - stwx r0,MEM - GOTNEXT - -movb_mem_reg: lbzx r0,MEM - NEXTBYTE(opcode) - stbx r0,REG - GOTNEXT - -movw_mem_reg: lhzx r0,MEM - NEXTBYTE(opcode) - sthx r0,REG - GOTNEXT - -movl_mem_reg: lwzx r0,MEM - NEXTBYTE(opcode) - stwx r0,REG - GOTNEXT - -/* short form exchange ax/eax with register */ -xchgw_ax_reg: clrlslwi opreg,opcode,29,2 - lhz r3,AX(state) - lhzx r4,REG - sthx r3,REG - sth r4,AX(state) - NEXT - -xchgl_eax_reg: clrlslwi opreg,opcode,29,2 - lwz r3,EAX(state) - lwzx r4,REG - stwx r3,REG - stw r4,EAX(state) - NEXT - -/* General exchange (unlocked!) */ -xchgb_reg_mem: lbzx r3,MEM - lbzx r4,REG - NEXTBYTE(opcode) - stbx r3,REG - stbx r4,MEM - GOTNEXT - -xchgw_reg_mem: lhzx r3,MEM - lhzx r4,REG - sthx r3,REG - sthx r4,MEM - NEXT - -xchgl_reg_mem: lwzx r3,MEM - lwzx r4,REG - stwx r3,REG - stwx r4,MEM - NEXT - -/* lea, one of the simplest instructions */ -leaw: cmpw base,state - beq- ud - sthbrx offset,REG - NEXT - -leal: cmpw base,state - beq- ud - stwbrx offset,REG - NEXT - -/* Short form pushes and pops */ -pushw_sp_reg: li r3,SP - lhbrx r4,state,r3 - clrlslwi opreg,opcode,29,2 - lhzx r0,REG - addi r4,r4,-2 - sthbrx r4,state,r3 - clrlwi r4,r4,16 - sthx r0,ssb,r4 - NEXT - -pushl_sp_reg: li r3,SP - lhbrx r4,state,r3 - clrlslwi opreg,opcode,29,2 - lwzx r0,REG - addi r4,r4,-4 - sthbrx r4,state,r3 - clrlwi r4,r4,16 - stwx r0,ssb,r4 - NEXT - -popw_sp_reg: li r3,SP - lhbrx r4,state,r3 - clrlslwi opreg,opcode,29,2 - lhzx r0,ssb,r4 - addi r4,r4,2 # order is important in case of pop sp - sthbrx r4,state,r3 - sthx r0,REG - NEXT - -popl_sp_reg: li r3,SP - lhbrx r4,state,r3 - clrlslwi opreg,opcode,29,2 - lwzx r0,ssb,r4 - addi r4,r4,4 - sthbrx r4,state,r3 - stwx r0,REG - NEXT - -/* Push immediate */ -pushw_sp_imm: li r3,SP - lhbrx r4,state,r3 - lhz r0,1(eip) - addi r4,r4,-2 - sthbrx r4,state,r3 - clrlwi r4,r4,16 - lbzu opcode,3(eip) - sthx r0,ssb,r4 - GOTNEXT - -pushl_sp_imm: li r3,SP - lhbrx r4,state,r3 - lwz r0,1(eip) - addi r4,r4,-4 - sthbrx r4,state,r3 - clrlwi r4,r4,16 - lbzu opcode,5(eip) - stwx r0,ssb,r4 - GOTNEXT - -pushw_sp_imm8: li r3,SP - lhbrx r4,state,r3 - lhz r0,1(eip) - addi r4,r4,-2 - sthbrx r4,state,r3 - clrlwi r4,r4,16 - lbzu opcode,2(eip) - extsb r0,r0 - sthx r0,ssb,r4 - GOTNEXT - -pushl_sp_imm8: li r3,SP - lhbrx r4,state,r3 - lhz r0,1(eip) - addi r4,r4,-4 - sthbrx r4,state,r3 - clrlwi r4,r4,16 - lbzu opcode,2(eip) - extsb r0,r0 - stwx r0,ssb,r4 - GOTNEXT - -/* General push/pop */ -pushw_sp: lhbrx r0,MEM - li r3,SP - lhbrx r4,state,r3 - addi r4,r4,-2 - sthbrx r4,state,r3 - clrlwi r4,r4,16 - sthbrx r0,r4,ssb - NEXT - -pushl_sp: lwbrx r0,MEM - li r3,SP - lhbrx r4,state,r3 - addi r4,r4,-4 - sthbrx r4,state,r3 - clrlwi r4,r4,16 - stwbrx r0,r4,ssb - NEXT - -/* pop is an exception with 32 bit addressing modes, it is possible -to calculate wrongly the address when esp is used as base. But 16 bit -addressing modes are safe */ - -popw_sp_a16: cmpw cr1,opreg,0 # first check the opcode - li r3,SP - lhbrx r4,state,r3 - bne- cr1,ud - lhzx r0,ssb,r4 - addi r4,r4,2 - sthx r0,MEM - sthbrx r4,state,r3 - NEXT - -popl_sp_a16: cmpw cr1,opreg,0 - li r3,SP - lhbrx r4,state,r3 - bne- cr1,ud - lwzx r0,ssb,r4 - addi r4,r4,2 - stwx r0,MEM - sthbrx r4,state,r3 - NEXT - -/* 32 bit addressing modes for pop not implemented for now. */ - .equ popw_sp_a32,unimpl - .equ popl_sp_a32,unimpl - -/* pusha/popa */ -pushaw_sp: li r3,SP - li r0,8 - lhbrx r4,r3,state - mtctr r0 - addi r5,state,-4 -1: addi r4,r4,-2 - lhzu r6,4(r5) - clrlwi r4,r4,16 - sthx r6,ssb,r4 - bdnz 1b - sthbrx r4,r3,state # new sp - NEXT - -pushal_sp: li r3,SP - li r0,8 - lhbrx r4,r3,state - mtctr r0 - addi r5,state,-4 -1: addi r4,r4,-4 - lwzu r6,4(r5) - clrlwi r4,r4,16 - stwx r6,ssb,r4 - bdnz 1b - sthbrx r4,r3,state # new sp - NEXT - -popaw_sp: li r3,SP - li r0,8 - lhbrx r4,state,r3 - mtctr r0 - addi r5,state,32 -1: lhzx r6,ssb,r4 - addi r4,r4,2 - sthu r6,-4(r5) - clrlwi r4,r4,16 - bdnz 1b - sthbrx r4,r3,state # updated sp - NEXT - -popal_sp: li r3,SP - lis r0,0xef00 # mask to skip esp - lhbrx r4,state,r3 - addi r5,state,32 -1: add. r0,r0,r0 - lwzx r6,ssb,r4 - addi r4,r4,4 - stwu r6,-4(r5) - clrlwi r4,r4,16 - blt 1b - addi r6,r6,-4 - beq 2f - addi r4,r4,4 - clrlwi r4,r4,16 - b 1b -2: sthbrx r4,state,r3 # updated sp - NEXT - -/* Moves with zero or sign extension: first the special cases */ -cbw: lbz r3,AL(state) - extsb r3,r3 - sthbrx r3,AX,state - NEXT - -cwde: lhbrx r3,AX,state - extsh r3,r3 - stwbrx r3,EAX,state - NEXT - -cwd: lbz r3,AH(state) - extsb r3,r3 - srwi r3,r3,8 # get sign bits - sth r3,DX(state) - NEXT - -cdq: lwbrx r3,EAX,state - srawi r3,r3,31 - stw r3,EDX(state) # byte order unimportant ! - NEXT - -/* The move with zero or sign extension are special since the source -and destination are not the same size. The register describing the destination -is modified to take this into account. */ - -movsbw: lbzx r3,MEM - rlwimi opreg,opreg,4,0x10 - extsb r3,r3 - rlwinm opreg,opreg,0,0x1c - sthbrx r3,REG - NEXT - -movsbl: lbzx r3,MEM - rlwimi opreg,opreg,4,0x10 - extsb r3,r3 - rlwinm opreg,opreg,0,0x1c - stwbrx r3,REG - NEXT - - .equ movsww, movw_mem_reg - -movswl: lhbrx r3,MEM - extsh r3,r3 - stwbrx r3,REG - NEXT - -movzbw: lbzx r3,MEM - rlwimi opreg,opreg,4,0x10 - rlwinm opreg,opreg,0,0x1c - sthbrx r3,REG - NEXT - -movzbl: lbzx r3,MEM - rlwimi opreg,opreg,4,0x10 - rlwinm opreg,opreg,0,0x1c - stwbrx r3,REG - NEXT - - .equ movzww, movw_mem_reg - -movzwl: lhbrx r3,MEM - stwbrx r3,REG - NEXT - -/* Byte swapping */ -bswap: clrlslwi opreg,opcode,29,2 # extract reg from opcode - lwbrx r0,REG - stwx r0,REG - NEXT - -/* Input/output */ -inb_port_al: NEXTBYTE(r4) - b 1f -inb_dx_al: li r4,DX - lhbrx r4,r4,state -1: li r3,code_inb - bl _check_port - lwz r3,iobase(state) - lbzx r5,r4,r3 - eieio - stb r5,AL(state) - NEXT - -inw_port_ax: NEXTBYTE(r4) - b 1f -inw_dx_ax: li r4,DX - lhbrx r4,r4,state -1: li r3,code_inw - bl _check_port - lwz r3,iobase(state) - lhzx r5,r4,r3 - eieio - sth r5,AX(state) - NEXT - -inl_port_eax: NEXTBYTE(r4) - b 1f -inl_dx_eax: li r4,DX - lhbrx r4,r4,state -1: li r3,code_inl - bl _check_port - lwz r3,iobase(state) - lwzx r5,r4,r3 - eieio - stw r5,EAX(state) - NEXT - -outb_al_port: NEXTBYTE(r4) - b 1f -outb_al_dx: li r4,DX - lhbrx r4,r4,state -1: li r3,code_outb - bl _check_port - lwz r3,iobase(state) - lbz r5,AL(state) - stbx r5,r4,r3 - eieio - NEXT - -outw_ax_port: NEXTBYTE(r4) - b 1f -outw_ax_dx: li r4,DX - lhbrx r4,r4,state -1: li r3,code_outw - bl _check_port - lwz r3,iobase(state) - lhz r5,AX(state) - sthx r5,r4,r3 - eieio - NEXT - -outl_eax_port: NEXTBYTE(r4) - b 1f -outl_eax_dx: li r4,DX - lhbrx r4,r4,state -1: li r3,code_outl - bl _check_port - lwz r4,iobase(state) - lwz r5,EAX(state) - stwx r5,r4,r3 - eieio - NEXT - -/* Macro used for add and sub */ -#define ARITH(op,fl) \ -op##b_reg_mem: lbzx op1,MEM; SET_FLAGS(fl(B)); lbzx op2,REG; \ - op result,op1,op2; \ - stbx result,MEM; NEXT; \ -op##w_reg_mem: lhbrx op1,MEM; SET_FLAGS(fl(W)); lhbrx op2,REG; \ - op result,op1,op2; \ - sthbrx result,MEM; NEXT; \ -op##l_reg_mem: lwbrx op1,MEM; SET_FLAGS(fl(L)); lwbrx op2,REG; \ - op result,op1,op2; \ - stwbrx result,MEM; NEXT; \ -op##b_mem_reg: lbzx op2,MEM; SET_FLAGS(fl(B)); lbzx op1,REG; \ - op result,op1,op2; \ - stbx result,REG; NEXT; \ -op##w_mem_reg: lhbrx op2,MEM; SET_FLAGS(fl(W)); lhbrx op1,REG; \ - op result,op1,op2; \ - sthbrx result,REG; NEXT; \ -op##l_mem_reg: lwbrx op2,MEM; SET_FLAGS(fl(L)); lwbrx op1,REG; \ - op result,op1,op2; \ - stwbrx result,REG; NEXT; \ -op##b_imm_al: addi base,state,0; li offset,AL; \ -op##b_imm: lbzx op1,MEM; SET_FLAGS(fl(B)); lbz op2,1(eip); \ - op result,op1,op2; \ - lbzu opcode,2(eip); \ - stbx result,MEM; GOTNEXT; \ -op##w_imm_ax: addi base,state,0; li offset,AX; \ -op##w_imm: lhbrx op1,MEM; SET_FLAGS(fl(W)); lhbrx op2,eip,one; \ - op result,op1,op2; \ - lbzu opcode,3(eip); \ - sthbrx result,MEM; GOTNEXT; \ -op##w_imm8: lbz op2,1(eip); SET_FLAGS(fl(W)); lhbrx op1,MEM; \ - extsb op2,op2; clrlwi op2,op2,16; \ - op result,op1,op2; \ - lbzu opcode,2(eip); \ - sthbrx result,MEM; GOTNEXT; \ -op##l_imm_eax: addi base,state,0; li offset,EAX; \ -op##l_imm: lwbrx op1,MEM; SET_FLAGS(fl(L)); lwbrx op2,eip,one; \ - op result,op1,op2; lbzu opcode,5(eip); \ - stwbrx result,MEM; GOTNEXT; \ -op##l_imm8: lbz op2,1(eip); SET_FLAGS(fl(L)); lwbrx op1,MEM; \ - extsb op2,op2; lbzu opcode,2(eip); \ - op result,op1,op2; \ - stwbrx result,MEM; GOTNEXT - - ARITH(add, FLAGS_ADD) - ARITH(sub, FLAGS_SUB) - -#define adc(result, op1, op2) adde result,op1,op2 -#define sbb(result, op1, op2) subfe result,op2,op1 - -#define ARITH_WITH_CARRY(op, fl) \ -op##b_reg_mem: lbzx op1,MEM; bl carryfor##op; lbzx op2,REG; \ - ADD_FLAGS(fl(B)); op(result, op1, op2); \ - stbx result,MEM; NEXT; \ -op##w_reg_mem: lhbrx op1,MEM; bl carryfor##op; lhbrx op2,REG; \ - ADD_FLAGS(fl(W)); op(result, op1, op2); \ - sthbrx result,MEM; NEXT; \ -op##l_reg_mem: lwbrx op1,MEM; bl carryfor##op; lwbrx op2,REG; \ - ADD_FLAGS(fl(L)); op(result, op1, op2); \ - stwbrx result,MEM; NEXT; \ -op##b_mem_reg: lbzx op1,MEM; bl carryfor##op; lbzx op2,REG; \ - ADD_FLAGS(fl(B)); op(result, op1, op2); \ - stbx result,REG; NEXT; \ -op##w_mem_reg: lhbrx op1,MEM; bl carryfor##op; lhbrx op2,REG; \ - ADD_FLAGS(fl(W)); op(result, op1, op2); \ - sthbrx result,REG; NEXT; \ -op##l_mem_reg: lwbrx op1,MEM; bl carryfor##op; lwbrx op2,REG; \ - ADD_FLAGS(fl(L)); op(result, op1, op2); \ - stwbrx result,REG; NEXT; \ -op##b_imm_al: addi base,state,0; li offset,AL; \ -op##b_imm: lbzx op1,MEM; bl carryfor##op; lbz op2,1(eip); \ - ADD_FLAGS(fl(B)); lbzu opcode,2(eip); op(result, op1, op2); \ - stbx result,MEM; GOTNEXT; \ -op##w_imm_ax: addi base,state,0; li offset,AX; \ -op##w_imm: lhbrx op1,MEM; bl carryfor##op; lhbrx op2,eip,one; \ - ADD_FLAGS(fl(W)); lbzu opcode,3(eip); op(result, op1, op2); \ - sthbrx result,MEM; GOTNEXT; \ -op##w_imm8: lbz op2,1(eip); bl carryfor##op; lhbrx op1,MEM; \ - extsb op2,op2; ADD_FLAGS(fl(W)); clrlwi op2,op2,16; \ - lbzu opcode,2(eip); op(result, op1, op2); \ - sthbrx result,MEM; GOTNEXT; \ -op##l_imm_eax: addi base,state,0; li offset,EAX; \ -op##l_imm: lwbrx op1,MEM; bl carryfor##op; lwbrx op2,eip,one; \ - ADD_FLAGS(fl(L)); lbzu opcode,5(eip); op(result, op1, op2); \ - stwbrx result,MEM; GOTNEXT; \ -op##l_imm8: lbz op2,1(eip); SET_FLAGS(fl(L)); lwbrx op1,MEM; \ - extsb op2,op2; lbzu opcode,2(eip); \ - op(result, op1, op2); \ - stwbrx result,MEM; GOTNEXT - -carryforadc: addc r3,flags,flags # CF_IN to xer[ca] - RES2CF(r4) # get 8 or 16 bit carry - subfe r3,result,op1 # generate PPC carry for - CF_ROTCNT(r5) # preceding operation - addze r3,r4 # 32 bit carry in LSB - CF_POL(r4,23) # polarity - rlwnm r3,r3,r5,0x100 # shift carry there - xor flags,r4,r3 # CF86 ? 0x100 : 0 - addic r4,r3,0xffffff00 # set xer[ca] - rlwinm flags,r3,23,CF_IN - blr - - ARITH_WITH_CARRY(adc, FLAGS_ADD) - -/* for sbb the input carry must be the complement of the x86 carry */ -carryforsbb: addc r3,flags,flags # CF_IN to xer[ca] - RES2CF(r4) # 8/16 bit carry from result - subfe r3,result,op1 - CF_ROTCNT(r5) - addze r3,r4 - CF_POL(r4,23) - rlwnm r3,r3,r5,0x100 - eqv flags,r4,r3 # CF86 ? 0xfffffeff:0xffffffff - addic r4,r3,1 # set xer[ca] - rlwinm flags,r3,23,CF_IN # keep only the carry - blr - - ARITH_WITH_CARRY(sbb, FLAGS_SBB) - -cmpb_reg_mem: lbzx op1,MEM - SET_FLAGS(FLAGS_CMP(B)) - lbzx op2,REG - extsb r3,op1 - cmplw cr4,op1,op2 - extsb r4,op2 - sub result,op1,op2 - cmpw cr6,r3,r4 - NEXT - -cmpw_reg_mem: lhbrx op1,MEM - SET_FLAGS(FLAGS_CMP(W)) - lhbrx op2,REG - extsh r3,op1 - cmplw cr4,op1,op2 - extsh r4,op2 - sub result,op1,op2 - cmpw cr6,r3,r4 - NEXT - -cmpl_reg_mem: lwbrx op1,MEM - SET_FLAGS(FLAGS_CMP(L)) - lwbrx op2,REG - cmplw cr4,op1,op2 - sub result,op1,op2 - cmpw cr6,op1,op2 - NEXT - -cmpb_mem_reg: lbzx op2,MEM - SET_FLAGS(FLAGS_CMP(B)) - lbzx op1,REG - extsb r4,op2 - cmplw cr4,op1,op2 - extsb r3,op1 - sub result,op1,op2 - cmpw cr6,r3,r4 - NEXT - -cmpw_mem_reg: lhbrx op2,MEM - SET_FLAGS(FLAGS_CMP(W)) - lhbrx op1,REG - extsh r4,op2 - cmplw cr4,op1,op2 - extsh r3,op1 - sub result,op1,op2 - cmpw cr6,r3,r4 - NEXT - -cmpl_mem_reg: lwbrx op2,MEM - SET_FLAGS(FLAGS_CMP(L)) - lwbrx op1,REG - cmpw cr6,op1,op2 - sub result,op1,op2 - cmplw cr4,op1,op2 - NEXT - -cmpb_imm_al: addi base,state,0 - li offset,AL -cmpb_imm: lbzx op1,MEM - SET_FLAGS(FLAGS_CMP(B)) - lbz op2,1(eip) - extsb r3,op1 - cmplw cr4,op1,op2 - lbzu opcode,2(eip) - extsb r4,op2 - sub result,op1,op2 - cmpw cr6,r3,r4 - GOTNEXT - -cmpw_imm_ax: addi base,state,0 - li offset,AX -cmpw_imm: lhbrx op1,MEM - SET_FLAGS(FLAGS_CMP(W)) - lhbrx op2,eip,one - extsh r3,op1 - cmplw cr4,op1,op2 - lbzu opcode,3(eip) - extsh r4,op2 - sub result,op1,op2 - cmpw cr6,r3,r4 - GOTNEXT - -cmpw_imm8: lbz op2,1(eip) - SET_FLAGS(FLAGS_CMP(W)) - lhbrx op1,MEM - extsb r4,op2 - extsh r3,op1 - lbzu opcode,2(eip) - clrlwi op2,r4,16 - cmpw cr6,r3,r4 - sub result,op1,op2 - cmplw cr4,op1,op2 - GOTNEXT - -cmpl_imm_eax: addi base,state,0 - li offset,EAX -cmpl_imm: lwbrx op1,MEM - SET_FLAGS(FLAGS_CMP(L)) - lwbrx op2,eip,one - cmpw cr6,op1,op2 - lbzu opcode,5(eip) - sub result,op1,op2 - cmplw cr4,op1,op2 - GOTNEXT - -cmpl_imm8: lbz op2,1(eip) - SET_FLAGS(FLAGS_CMP(L)) - lwbrx op1,MEM - extsb op2,op2 - lbzu opcode,2(eip) - cmpw cr6,op1,op2 - sub result,op1,op2 - cmplw cr4,op1,op2 - GOTNEXT - -/* Increment and decrement */ -incb: lbzx op2,MEM - INC_FLAGS(B) - addi op2,op2,1 - stbx op2,MEM - NEXT - -incw_reg: clrlslwi opreg,opcode,29,2 # extract reg from opcode - lhbrx op2,REG - INC_FLAGS(W) - addi op2,op2,1 - sthbrx op2,REG - NEXT - -incw: lhbrx op2,MEM - INC_FLAGS(W) - addi op2,op2,1 - sthbrx op2,MEM - NEXT - -incl_reg: clrlslwi opreg,opcode,29,2 - lwbrx op2,REG - INC_FLAGS(L) - addi op2,op2,1 - sthbrx op2,REG - NEXT - -incl: lwbrx op2,MEM - INC_FLAGS(L) - addi op2,op2,1 - stwbrx op2,MEM - NEXT - -decb: lbzx op2,MEM - DEC_FLAGS(B) - addi op2,op2,-1 - stbx op2,MEM - NEXT - -decw_reg: clrlslwi opreg,opcode,29,2 # extract reg from opcode - lhbrx op2,REG - DEC_FLAGS(W) - addi op2,op2,-1 - sthbrx op2,REG - NEXT - -decw: lhbrx op2,MEM - DEC_FLAGS(W) - addi op2,op2,-1 - sthbrx op2,MEM - NEXT - -decl_reg: clrlslwi opreg,opcode,29,2 - lwbrx op2,REG - DEC_FLAGS(L) - addi op2,op2,-1 - sthbrx op2,REG - NEXT - -decl: lwbrx op2,MEM - DEC_FLAGS(L) - addi op2,op2,-1 - stwbrx op2,MEM - NEXT - -negb: lbzx op2,MEM - SET_FLAGS(FLAGS_SUB(B)) - neg result,op2 - li op1,0 - stbx result,MEM - NEXT - -negw: lhbrx op2,MEM - SET_FLAGS(FLAGS_SUB(W)) - neg result,op2 - li op1,0 - sthbrx r0,MEM - NEXT - -negl: lwbrx op2,MEM - SET_FLAGS(FLAGS_SUB(L)) - subfic result,op2,0 - li op1,0 - stwbrx result,MEM - NEXT - -/* Macro used to generate code for OR/AND/XOR */ -#define LOGICAL(op) \ -op##b_reg_mem: lbzx op1,MEM; SET_FLAGS(FLAGS_LOG(B)); lbzx op2,REG; \ - op result,op1,op2; \ - stbx result,MEM; NEXT; \ -op##w_reg_mem: lhbrx op1,MEM; SET_FLAGS(FLAGS_LOG(W)); lhbrx op2,REG; \ - op result,op1,op2; \ - sthbrx result,MEM; NEXT; \ -op##l_reg_mem: lwbrx op1,MEM; SET_FLAGS(FLAGS_LOG(L)); lwbrx op2,REG; \ - op result,op1,op2; \ - stwbrx result,MEM; NEXT; \ -op##b_mem_reg: lbzx op1,MEM; SET_FLAGS(FLAGS_LOG(B)); lbzx op2,REG; \ - op result,op1,op2; \ - stbx result,REG; NEXT; \ -op##w_mem_reg: lhbrx op2,MEM; SET_FLAGS(FLAGS_LOG(W)); lhbrx op1,REG; \ - op result,op1,op2; \ - sthbrx result,REG; NEXT; \ -op##l_mem_reg: lwbrx op2,MEM; SET_FLAGS(FLAGS_LOG(L)); lwbrx op1,REG; \ - op result,op1,op2; \ - stwbrx result,REG; NEXT; \ -op##b_imm_al: addi base,state,0; li offset,AL; \ -op##b_imm: lbzx op1,MEM; SET_FLAGS(FLAGS_LOG(B)); lbz op2,1(eip); \ - op result,op1,op2; lbzu opcode,2(eip); \ - stbx result,MEM; GOTNEXT; \ -op##w_imm_ax: addi base,state,0; li offset,AX; \ -op##w_imm: lhbrx op1,MEM; SET_FLAGS(FLAGS_LOG(W)); lhbrx op2,eip,one; \ - op result,op1,op2; lbzu opcode,3(eip); \ - sthbrx result,MEM; GOTNEXT; \ -op##w_imm8: lbz op2,1(eip); SET_FLAGS(FLAGS_LOG(W)); lhbrx op1,MEM; \ - extsb op2,op2; lbzu opcode,2(eip); \ - op result,op1,op2; \ - sthbrx result,MEM; GOTNEXT; \ -op##l_imm_eax: addi base,state,0; li offset,EAX; \ -op##l_imm: lwbrx op1,MEM; SET_FLAGS(FLAGS_LOG(L)); lwbrx op2,eip,one; \ - op result,op1,op2; lbzu opcode,5(eip); \ - stwbrx result,MEM; GOTNEXT; \ -op##l_imm8: lbz op2,1(eip); SET_FLAGS(FLAGS_LOG(L)); lwbrx op1,MEM; \ - extsb op2,op2; lbzu opcode,2(eip); \ - op result,op1,op2; \ - stwbrx result,MEM; GOTNEXT - - LOGICAL(or) - - LOGICAL(and) - - LOGICAL(xor) - -testb_reg_mem: lbzx op1,MEM - SET_FLAGS(FLAGS_TEST(B)) - lbzx op2,REG - and result,op1,op2 - extsb r3,result - cmpwi cr6,r3,0 - NEXT - -testw_reg_mem: lhbrx op1,MEM - SET_FLAGS(FLAGS_TEST(W)) - lhbrx op2,REG - and result,op1,op2 - extsh r3,result - cmpwi cr6,r3,0 - NEXT - -testl_reg_mem: lwbrx r3,MEM - SET_FLAGS(FLAGS_TEST(L)) - lwbrx r4,REG - and result,op1,op2 - cmpwi cr6,result,0 - NEXT - -testb_imm_al: addi base,state,0 - li offset,AL -testb_imm: lbzx op1,MEM - SET_FLAGS(FLAGS_TEST(B)) - lbz op2,1(eip) - and result,op1,op2 - lbzu opcode,2(eip) - extsb r3,result - cmpwi cr6,r3,0 - GOTNEXT - -testw_imm_ax: addi base,state,0 - li offset,AX -testw_imm: lhbrx op1,MEM - SET_FLAGS(FLAGS_TEST(W)) - lhbrx op2,eip,one - and result,op1,op2 - lbzu opcode,3(eip) - extsh r3,result - cmpwi cr6,r3,0 - GOTNEXT - -testl_imm_eax: addi base,state,0 - li offset,EAX -testl_imm: lwbrx op1,MEM - SET_FLAGS(FLAGS_TEST(L)) - lwbrx op2,eip,one - and result,r3,r4 - lbzu opcode,5(eip) - cmpwi cr6,result,0 - GOTNEXT - -/* Not does not affect flags */ -notb: lbzx r3,MEM - xori r3,r3,255 - stbx r3,MEM - NEXT - -notw: lhzx r3,MEM - xori r3,r3,65535 - sthx r3,MEM - NEXT - -notl: lwzx r3,MEM - not r3,r3 - stwx r3,MEM - NEXT - -boundw: lhbrx r4,REG - li r3,code_bound - lhbrx r5,MEM - addi offset,offset,2 - extsh r4,r4 - lhbrx r6,MEM - extsh r5,r5 - cmpw r4,r5 - extsh r6,r6 - blt- complex - cmpw r4,r6 - ble+ nop - b complex - -boundl: lwbrx r4,REG - li r3,code_bound - lwbrx r5,MEM - addi offset,offset,4 - lwbrx r6,MEM - cmpw r4,r5 - blt- complex - cmpw r4,r6 - ble+ nop - b complex - -/* Bit test and modify instructions */ - -/* Common routine: bit index in op2, returns memory value in r3, mask in op2, -and of mask and value in op1. CF flag is set as with 32 bit add when bit is -non zero since result (which is cleared) will be less than op1, and in cr4, -all other flags are undefined from Intel doc. Here OF and SF are cleared -and ZF is set as a side effect of result being cleared. */ -_setup_bitw: cmpw base,state - SET_FLAGS(FLAGS_BTEST) - extsh op2,op2 - beq- 1f - srawi r4,op2,4 - add offset,offset,r4 -1: clrlwi op2,op2,28 # true bit index - lhbrx r3,MEM - slw op2,one,op2 # build mask - li result,0 # implicitly sets CF - and op1,r3,op2 # if result<op1 - cmplw cr4,result,op1 # sets CF in cr4 - blr - -_setup_bitl: cmpw base,state - SET_FLAGS(FLAGS_BTEST) - beq- 1f - srawi r4,op2,5 - add offset,offset,r4 -1: lwbrx r3,MEM - rotlw op2,one,op2 # build mask - li result,0 - and op1,r3,op2 - cmplw cr4,result,op1 - blr - -/* Immediate forms bit tests are not frequent since logical are often faster */ -btw_imm: NEXTBYTE(op2) - b 1f -btw_reg_mem: lhbrx op2,REG -1: bl _setup_bitw - NEXT - -btl_imm: NEXTBYTE(op2) - b 1f -btl_reg_mem: lhbrx op2,REG -1: bl _setup_bitl - NEXT - -btcw_imm: NEXTBYTE(op2) - b 1f -btcw_reg_mem: lhbrx op2,REG -1: bl _setup_bitw - xor r3,r3,op2 - sthbrx r3,MEM - NEXT - -btcl_imm: NEXTBYTE(op2) - b 1f -btcl_reg_mem: lhbrx op2,REG -1: bl _setup_bitl - xor r3,r3,op2 - stwbrx result,MEM - NEXT - -btrw_imm: NEXTBYTE(op2) - b 1f -btrw_reg_mem: lhbrx op2,REG -1: bl _setup_bitw - andc r3,r3,op2 - sthbrx r3,MEM - NEXT - -btrl_imm: NEXTBYTE(op2) - b 1f -btrl_reg_mem: lhbrx op2,REG -1: bl _setup_bitl - andc r3,r3,op2 - stwbrx r3,MEM - NEXT - -btsw_imm: NEXTBYTE(op2) - b 1f -btsw_reg_mem: lhbrx op2,REG -1: bl _setup_bitw - or r3,r3,op2 - sthbrx r3,MEM - NEXT - -btsl_imm: NEXTBYTE(op2) - b 1f -btsl_reg_mem: lhbrx op2,REG -1: bl _setup_bitl - or r3,r3,op2 - stwbrx r3,MEM - NEXT - -/* Bit string search instructions, only ZF is defined after these, and the -result value is not defined when the bit field is zero. */ -bsfw: lhbrx result,MEM - SET_FLAGS(FLAGS_BSRCH(W)) - neg r3,result - cmpwi cr6,result,0 # sets ZF - and r3,r3,result # keep only LSB - cntlzw r3,r3 - subfic r3,r3,31 - sthbrx r3,REG - NEXT - -bsfl: lwbrx result,MEM - SET_FLAGS(FLAGS_BSRCH(L)) - neg r3,result - cmpwi cr6,result,0 # sets ZF - and r3,r3,result # keep only LSB - cntlzw r3,r3 - subfic r3,r3,31 - stwbrx r3,REG - NEXT - -bsrw: lhbrx result,MEM - SET_FLAGS(FLAGS_BSRCH(W)) - cntlzw r3,result - cmpwi cr6,result,0 - subfic r3,r3,31 - sthbrx r3,REG - NEXT - -bsrl: lwbrx result,MEM - SET_FLAGS(FLAGS_BSRCH(L)) - cntlzw r3,result - cmpwi cr6,result,0 - subfic r3,r3,31 - stwbrx r3,REG - NEXT - -/* Unconditional jumps, first the indirect than relative */ -jmpw: lhbrx eip,MEM - lbzux opcode,eip,csb - GOTNEXT - -jmpl: lwbrx eip,MEM - lbzux opcode,eip,csb - GOTNEXT - -sjmp_w: lbz r3,1(eip) - sub eip,eip,csb - addi eip,eip,2 # EIP after instruction - extsb r3,r3 - add eip,eip,r3 - clrlwi eip,eip,16 # module 64k - lbzux opcode,eip,csb - GOTNEXT - -jmp_w: lhbrx r3,eip,one # eip now off by 3 - sub eip,eip,csb - addi r3,r3,3 # compensate - add eip,eip,r3 - clrlwi eip,eip,16 - lbzux opcode,eip,csb - GOTNEXT - -sjmp_l: lbz r3,1(eip) - addi eip,eip,2 - extsb r3,r3 - lbzux opcode,eip,r3 - GOTNEXT - -jmp_l: lwbrx r3,eip,one # Simple - addi eip,eip,5 - lbzux opcode,eip,r3 - GOTNEXT - -/* The conditional jumps: although it should not happen, -byte relative jumps (sjmp) may wrap around in 16 bit mode */ - -#define NOTTAKEN_S lbzu opcode,2(eip); GOTNEXT -#define NOTTAKEN_W lbzu opcode,3(eip); GOTNEXT -#define NOTTAKEN_L lbzu opcode,5(eip); GOTNEXT - -#define CONDJMP(cond, eval, flag) \ -sj##cond##_w: EVAL_##eval; bt flag,sjmp_w; NOTTAKEN_S; \ -j##cond##_w: EVAL_##eval; bt flag,jmp_w; NOTTAKEN_W; \ -sj##cond##_l: EVAL_##eval; bt flag,sjmp_l; NOTTAKEN_S; \ -j##cond##_l: EVAL_##eval; bt flag,jmp_l; NOTTAKEN_L; \ -sjn##cond##_w: EVAL_##eval; bf flag,sjmp_w; NOTTAKEN_S; \ -jn##cond##_w: EVAL_##eval; bf flag,jmp_w; NOTTAKEN_W; \ -sjn##cond##_l: EVAL_##eval; bf flag,sjmp_l; NOTTAKEN_S; \ -jn##cond##_l: EVAL_##eval; bf flag,jmp_l; NOTTAKEN_L - - CONDJMP(o, OF, OF) - CONDJMP(c, CF, CF) - CONDJMP(z, ZF, ZF) - CONDJMP(a, ABOVE, ABOVE) - CONDJMP(s, SF, SF) - CONDJMP(p, PF, PF) - CONDJMP(g, SIGNED, SGT) - CONDJMP(l, SIGNED, SLT) - -jcxz_w: lhz r3,CX(state); cmpwi r3,0; beq- sjmp_w; NOTTAKEN_S -jcxz_l: lhz r3,CX(state); cmpwi r3,0; beq- sjmp_l; NOTTAKEN_S -jecxz_w: lwz r3,ECX(state); cmpwi r3,0; beq- sjmp_w; NOTTAKEN_S -jecxz_l: lwz r3,ECX(state); cmpwi r3,0; beq- sjmp_l; NOTTAKEN_S - -/* Note that loop is somewhat strange, the data size attribute gives -the size of eip, and the address size whether the counter is cx or ecx. -This is the same for jcxz/jecxz. */ - -loopw_w: li opreg,CX - lhbrx r0,REG - sub. r0,r0,one - sthbrx r0,REG - bne+ sjmp_w - NOTTAKEN_S - -loopl_w: li opreg,ECX - lwbrx r0,REG - sub. r0,r0,one - stwbrx r0,REG - bne+ sjmp_w - NOTTAKEN_S - -loopw_l: li opreg,CX - lhbrx r0,REG - sub. r0,r0,one - sthbrx r0,REG - bne+ sjmp_l - NOTTAKEN_S - -loopl_l: li opreg,ECX - lwbrx r0,REG - sub. r0,r0,one - stwbrx r0,REG - bne+ sjmp_l - NOTTAKEN_S - -loopzw_w: li opreg,CX - lhbrx r0,REG - EVAL_ZF - sub. r0,r0,one - sthbrx r0,REG - bf ZF,1f - bne+ sjmp_w -1: NOTTAKEN_S - -loopzl_w: li opreg,ECX - lwbrx r0,REG - EVAL_ZF - sub. r3,r3,one - stwbrx r3,REG - bf ZF,1f - bne+ sjmp_w -1: NOTTAKEN_S - -loopzw_l: li opreg,CX - lhbrx r0,REG - EVAL_ZF - sub. r0,r0,one - sthbrx r0,REG - bf ZF,1f - bne+ sjmp_l -1: NOTTAKEN_S - -loopzl_l: li opreg,ECX - lwbrx r0,REG - EVAL_ZF - sub. r0,r0,one - stwbrx r0,REG - bf ZF,1f - bne+ sjmp_l -1: NOTTAKEN_S - -loopnzw_w: li opreg,CX - lhbrx r0,REG - EVAL_ZF - sub. r0,r0,one - sthbrx r0,REG - bt ZF,1f - bne+ sjmp_w -1: NOTTAKEN_S - -loopnzl_w: li opreg,ECX - lwbrx r0,REG - EVAL_ZF - sub. r0,r0,one - stwbrx r0,REG - bt ZF,1f - bne+ sjmp_w -1: NOTTAKEN_S - -loopnzw_l: li opreg,CX - lhbrx r0,REG - EVAL_ZF - sub. r0,r0,one - sthbrx r0,REG - bt ZF,1f - bne+ sjmp_l -1: NOTTAKEN_S - -loopnzl_l: li opreg,ECX - lwbrx r0,REG - EVAL_ZF - sub. r0,r0,one - stwbrx r0,REG - bt ZF,1f - bne+ sjmp_l -1: NOTTAKEN_S - -/* Memory indirect calls are rare enough to limit code duplication */ -callw_sp_mem: lhbrx r3,MEM - sub r4,eip,csb - addi r4,r4,1 # r4 is now return address - b 1f - .equ calll_sp_mem, unimpl - -callw_sp: lhbrx r3,eip,one - sub r4,eip,csb - addi r4,r4,3 # r4 is return address - add r3,r4,r3 -1: clrlwi eip,r3,16 - li r5,SP - lhbrx r6,state,r5 # get sp - addi r6,r6,-2 - lbzux opcode,eip,csb - sthbrx r6,state,r5 # update sp - clrlwi r6,r6,16 - sthbrx r4,ssb,r6 # push return address - GOTNEXT - .equ calll_sp, unimpl - -retw_sp_imm: li opreg,SP - lhbrx r4,REG - lhbrx r6,eip,one - addi r5,r4,2 - lhbrx eip,ssb,r4 - lbzux opcode,eip,csb - add r5,r5,r6 - sthbrx r5,REG - GOTNEXT - - .equ retl_sp_imm, unimpl - -retw_sp: li opreg,SP - lhbrx r4,REG - addi r5,r4,2 - lhbrx eip,ssb,r4 - lbzux opcode,eip,csb - sthbrx r5,REG - GOTNEXT - - .equ retl_sp, unimpl - -/* Enter is a mess, and the description in Intel documents is actually wrong - * in most revisions (all PPro/PII I have but the old Pentium is Ok) ! - */ - -enterw_sp: lhbrx r0,eip,one # Stack space to allocate - li opreg,SP - lhbrx r3,REG # SP - li r7,BP - lbzu r4,3(eip) # nesting level - addi r3,r3,-2 - lhbrx r5,state,r7 # Original BP - clrlwi r3,r3,16 - sthbrx r5,ssb,r3 # Push BP - andi. r4,r4,31 # modulo 32 and test - mr r6,r3 # Save frame pointer to temp - beq 3f - mtctr r4 # iterate level-1 times - b 2f -1: addi r5,r5,-2 # copy list of frame pointers - clrlwi r5,r5,16 - lhzx r4,ssb,r5 - addi r3,r3,-2 - clrlwi r3,r3,16 - sthx r4,ssb,r3 -2: bdnz 1b - addi r3,r3,-2 # save current frame pointer - clrlwi r3,r3,16 - sthbrx r6,ssb,r3 -3: sthbrx r6,state,r7 # New BP - sub r3,r3,r0 - sthbrx r3,REG # Save new stack pointer - NEXT - - .equ enterl_sp, unimpl - -leavew_sp: li opreg,BP - lhbrx r3,REG # Stack = BP - addi r4,r3,2 # - lhzx r3,ssb,r3 - li opreg,SP - sthbrx r4,REG # New Stack - sth r3,BP(state) # Popped BP - NEXT - - .equ leavel_sp, unimpl - -/* String instructions: first a generic setup routine, which exits early -if there is a repeat prefix with a count of 0 */ -#define STRINGSRC base,offset -#define STRINGDST esb,opreg - -_setup_stringw: li offset,SI # - rlwinm. r3,opcode,19,0,1 # lt=repnz, gt= repz, eq none - li opreg,DI - lhbrx offset,state,offset # load si - li r3,1 # no repeat - lhbrx opreg,state,opreg # load di - beq 1f # no repeat - li r3,CX - lhbrx r3,state,r3 # load CX - cmpwi r3,0 - beq nop # early exit here ! -1: mtctr r3 # ctr=CX or 1 - li r7,1 # stride - bflr+ DF - li r7,-1 # change stride sign - blr - -/* Ending routine to update all changed registers (goes directly to NEXT) */ -_finish_strw: li r4,SI - sthbrx offset,state,r4 # update si - li r4,DI - sthbrx opreg,state,r4 # update di - beq nop - mfctr r3 - li r4,CX - sthbrx r3,state,r4 # update cx - NEXT - -lodsb_a16: bl _setup_stringw -1: lbzx r0,STRINGSRC # [rep] lodsb - add offset,offset,r7 - clrlwi offset,offset,16 - bdnz 1b - stb r0,AL(state) - b _finish_strw - -lodsw_a16: bl _setup_stringw - slwi r7,r7,1 -1: lhzx r0,STRINGSRC # [rep] lodsw - add offset,offset,r7 - clrlwi offset,offset,16 - bdnz 1b - sth r0,AX(state) - b _finish_strw - -lodsl_a16: bl _setup_stringw - slwi r7,r7,2 -1: lwzx r0,STRINGSRC # [rep] lodsl - add offset,offset,r7 - clrlwi offset,offset,16 - bdnz 1b - stw r0,EAX(state) - b _finish_strw - -stosb_a16: bl _setup_stringw - lbz r0,AL(state) -1: stbx r0,STRINGDST # [rep] stosb - add opreg,opreg,r7 - clrlwi opreg,opreg,16 - bdnz 1b - b _finish_strw - -stosw_a16: bl _setup_stringw - lhz r0,AX(state) - slwi r7,r7,1 -1: sthx r0,STRINGDST # [rep] stosw - add opreg,opreg,r7 - clrlwi opreg,opreg,16 - bdnz 1b - b _finish_strw - -stosl_a16: bl _setup_stringw - lwz r0,EAX(state) - slwi r7,r7,2 -1: stwx r0,STRINGDST # [rep] stosl - add opreg,opreg,r7 - clrlwi opreg,opreg,16 - bdnz 1b - b _finish_strw - -movsb_a16: bl _setup_stringw -1: lbzx r0,STRINGSRC # [rep] movsb - add offset,offset,r7 - stbx r0,STRINGDST - clrlwi offset,offset,16 - add opreg,opreg,r7 - clrlwi opreg,opreg,16 - bdnz 1b - b _finish_strw - -movsw_a16: bl _setup_stringw - slwi r7,r7,1 -1: lhzx r0,STRINGSRC # [rep] movsw - add offset,offset,r7 - sthx r0,STRINGDST - clrlwi offset,offset,16 - add opreg,opreg,r7 - clrlwi opreg,opreg,16 - bdnz 1b - b _finish_strw - -movsl_a16: bl _setup_stringw - slwi r7,r7,2 -1: lwzx r0,STRINGSRC # [rep] movsl - add offset,offset,r7 - stwx r0,STRINGDST - clrlwi offset,offset,16 - add opreg,opreg,r7 - clrlwi opreg,opreg,16 - bdnz 1b - b _finish_strw - -/* At least on a Pentium, repeated string I/O instructions check for -access port permission even if count is 0 ! So the order of the check is not -important. */ -insb_a16: li r4,DX - li r3,code_insb_a16 - lhbrx r4,state,r4 - bl _check_port - bl _setup_stringw - lwz base,iobase(state) -1: lbzx r0,base,r4 # [rep] insb - eieio - stbx r0,STRINGDST - add opreg,opreg,r7 - clrlwi opreg,opreg,16 - bdnz 1b - b _finish_strw - -insw_a16: li r4,DX - li r3,code_insw_a16 - lhbrx r4,state,r4 - bl _check_port - bl _setup_stringw - lwz base,iobase(state) - slwi r7,r7,1 -1: lhzx r0,base,r4 # [rep] insw - eieio - sthx r0,STRINGDST - add opreg,opreg,r7 - clrlwi opreg,opreg,16 - bdnz 1b - b _finish_strw - -insl_a16: li r4,DX - li r3,code_insl_a16 - lhbrx r4,state,r4 - bl _check_port - bl _setup_stringw - lwz base,iobase(state) - slwi r7,r7,2 -1: lwzx r0,base,r4 # [rep] insl - eieio - stwx r0,STRINGDST - add opreg,opreg,r7 - clrlwi opreg,opreg,16 - bdnz 1b - b _finish_strw - -outsb_a16: li r4,DX - li r3,code_outsb_a16 - lhbrx r4,state,r4 - bl _check_port - bl _setup_stringw - lwz r6,iobase(state) -1: lbzx r0,STRINGSRC # [rep] outsb - add offset,offset,r7 - stbx r0,r6,r4 - clrlwi offset,offset,16 - eieio - bdnz 1b - b _finish_strw - -outsw_a16: li r4,DX - li r3,code_outsw_a16 - lhbrx r4,state,r4 - bl _check_port - bl _setup_stringw - li r5,DX - lwz r6,iobase(state) - slwi r7,r7,1 -1: lhzx r0,STRINGSRC # [rep] outsw - add offset,offset,r7 - sthx r0,r6,r4 - clrlwi offset,offset,16 - eieio - bdnz 1b - b _finish_strw - -outsl_a16: li r4,DX - li r3,code_outsl_a16 - lhbrx r4,state,r4 - bl _check_port - bl _setup_stringw - lwz r6,iobase(state) - slwi r7,r7,2 -1: lwzx r0,STRINGSRC # [rep] outsl - add offset,offset,r7 - stwx r0,r6,r4 - clrlwi offset,offset,16 - eieio - bdnz 1b - b _finish_strw - -cmpsb_a16: bl _setup_stringw - SET_FLAGS(FLAGS_CMP(B)) - blt 3f # repnz prefix -1: lbzx op1,STRINGSRC # [repz] cmpsb - add offset,offset,r7 - lbzx op2,STRINGDST - add opreg,opreg,r7 - cmplw cr4,op1,op2 - clrlwi offset,offset,16 - clrlwi opreg,opreg,16 - bdnzt CF+2,1b -2: extsb r3,op1 - extsb r4,op2 - cmpw cr6,r3,r4 - sub result,op1,op2 - b _finish_strw - -3: lbzx op1,STRINGSRC # repnz cmpsb - add offset,offset,r7 - lbzx op2,STRINGDST - add opreg,opreg,r7 - cmplw cr4,op1,op2 - clrlwi offset,offset,16 - clrlwi opreg,opreg,16 - bdnzf CF+2,3b - b 2b - -cmpsw_a16: bl _setup_stringw - SET_FLAGS(FLAGS_CMP(W)) - slwi r7,r7,1 - blt 3f # repnz prefix -1: lhbrx op1,STRINGSRC # [repz] cmpsb - add offset,offset,r7 - lhbrx op2,STRINGDST - add opreg,opreg,r7 - cmplw cr4,op1,op2 - clrlwi offset,offset,16 - clrlwi opreg,opreg,16 - bdnzt CF+2,1b -2: extsh r3,op1 - extsh r4,op2 - cmpw cr6,r3,r4 - sub result,op1,op2 - b _finish_strw - -3: lhbrx op1,STRINGSRC # repnz cmpsw - add offset,offset,r7 - lhbrx op2,STRINGDST - add opreg,opreg,r7 - cmplw cr4,op1,op2 - clrlwi offset,offset,16 - clrlwi opreg,opreg,16 - bdnzf CF+2,3b - b 2b - -cmpsl_a16: bl _setup_stringw - SET_FLAGS(FLAGS_CMP(L)) - slwi r7,r7,2 - blt 3f # repnz prefix -1: lwbrx op1,STRINGSRC # [repz] cmpsl - add offset,offset,r7 - lwbrx op2,STRINGDST - add opreg,opreg,r7 - cmplw cr4,op1,op2 - clrlwi offset,offset,16 - clrlwi opreg,opreg,16 - bdnzt CF+2,1b -2: cmpw cr6,op1,op2 - sub result,op1,op2 - b _finish_strw - -3: lwbrx op1,STRINGSRC # repnz cmpsl - add offset,offset,r7 - lwbrx op2,STRINGDST - add opreg,opreg,r7 - cmplw cr4,op1,op2 - clrlwi offset,offset,16 - clrlwi opreg,opreg,16 - bdnzf CF+2,3b - b 2b - -scasb_a16: bl _setup_stringw - lbzx op1,AL,state # AL - SET_FLAGS(FLAGS_CMP(B)) - bgt 3f # repz prefix -1: lbzx op2,STRINGDST # [repnz] scasb - add opreg,opreg,r7 - cmplw cr4,op1,op2 - clrlwi opreg,opreg,16 - bdnzf CF+2,1b -2: extsb r3,op1 - extsb r4,op2 - cmpw cr6,r3,r4 - sub result,op1,op2 - b _finish_strw - -3: lbzx op2,STRINGDST # repz scasb - add opreg,opreg,r7 - cmplw cr4,op1,op2 - clrlwi opreg,opreg,16 - bdnzt CF+2,3b - b 2b - -scasw_a16: bl _setup_stringw - lhbrx op1,AX,state - SET_FLAGS(FLAGS_CMP(W)) - slwi r7,r7,1 - bgt 3f # repz prefix -1: lhbrx op2,STRINGDST # [repnz] scasw - add opreg,opreg,r7 - cmplw cr4,op1,op2 - clrlwi opreg,opreg,16 - bdnzf CF+2,1b -2: extsh r3,op1 - extsh r4,op2 - cmpw cr6,r3,r4 - sub result,op1,op2 - b _finish_strw - -3: lhbrx op2,STRINGDST # repz scasw - add opreg,opreg,r7 - cmplw cr4,op1,op2 - clrlwi opreg,opreg,16 - bdnzt CF+2,3b - b 2b - -scasl_a16: bl _setup_stringw - lwbrx op1,EAX,state - SET_FLAGS(FLAGS_CMP(L)) - slwi r7,r7,2 - bgt 3f # repz prefix -1: lwbrx op2,STRINGDST # [repnz] scasl - add opreg,opreg,r7 - cmplw cr4,op1,op2 - clrlwi opreg,opreg,16 - bdnzf CF+2,1b -2: cmpw cr6,op1,op2 - sub result,op1,op2 - b _finish_strw - -3: lwbrx op2,STRINGDST # repz scasl - add opreg,opreg,r7 - cmplw cr4,op1,op2 - clrlwi opreg,opreg,16 - bdnzt CF+2,3b - b 2b - - .equ lodsb_a32, unimpl - .equ lodsw_a32, unimpl - .equ lodsl_a32, unimpl - .equ stosb_a32, unimpl - .equ stosw_a32, unimpl - .equ stosl_a32, unimpl - .equ movsb_a32, unimpl - .equ movsw_a32, unimpl - .equ movsl_a32, unimpl - .equ insb_a32, unimpl - .equ insw_a32, unimpl - .equ insl_a32, unimpl - .equ outsb_a32, unimpl - .equ outsw_a32, unimpl - .equ outsl_a32, unimpl - .equ cmpsb_a32, unimpl - .equ cmpsw_a32, unimpl - .equ cmpsl_a32, unimpl - .equ scasb_a32, unimpl - .equ scasw_a32, unimpl - .equ scasl_a32, unimpl - -xlatb_a16: li offset,BX - lbz r3,AL(state) - lhbrx offset,offset,state - add r3,r3,base - lbzx r3,r3,offset - stb r3,AL(state) - NEXT - - .equ xlatb_a32, unimpl - -/* - * Shift and rotates: note the oddity that rotates do not affect SF/ZF/AF/PF - * but shifts do. Also testing has indicated that rotates with a count of zero - * do not affect any flag. The documentation specifies this for shifts but - * is more obscure for rotates. The overflow flag setting is only specified - * when count is 1, otherwise OF is undefined which simplifies emulation. - */ - -/* - * The rotates through carry are among the most difficult instructions, - * they are implemented as a shift of 2*n+some bits depending on case. - * First the left rotates through carry. - */ - -/* Byte rcl is performed on 18 bits (17 actually used) in a single register */ -rclb_imm: NEXTBYTE(r3) - b 1f -rclb_cl: lbz r3,CL(state) - b 1f -rclb_1: li r3,1 -1: lbzx r0,MEM - andi. r3,r3,31 # count%32 - addc r4,flags,flags # CF_IN->xer[ca] - RES2CF(r6) - subfe r4,result,op1 - mulli r5,r3,29 # 29=ceil(256/9) - CF_ROTCNT(r7) - addze r6,r6 - CF_POL_INSERT(r0,23) - srwi r5,r5,8 # count/9 - rlwnm r6,r6,r7,0x100 - xor r0,r0,r6 # (23)0:CF:data8 - rlwimi r5,r5,3,26,28 # 9*(count/9) - rlwimi r0,r0,23,0,7 # CF:(data8):(14)0:CF:data8 - sub r3,r3,r5 # count%9 - beq- nop # no flags changed if count 0 - ROTATE_FLAGS - rlwnm r0,r0,r3,0x000001ff # (23)0:NewCF:Result8 - rlwimi flags,r0,19,CF_VALUE - stbx r0,MEM - rlwimi flags,r0,18,OF_XOR - NEXT - -/* Word rcl is performed on 33 bits (CF:data16:CF:(15 MSB of data16) */ -rclw_imm: NEXTBYTE(r3) - b 1f -rclw_cl: lbz r3,CL(state) - b 1f -rclw_1: li r3,1 -1: lhbrx r0,MEM - andi. r3,r3,31 # count=count%32 - addc r4,flags,flags - RES2CF(r6) - subfe r4,result,op1 - addi r5,r3,15 # modulo 17: >=32 if >=17 - CF_ROTCNT(r7) - addze r6,r6 - addi r7,r7,8 - CF_POL_INSERT(r0,15) - srwi r5,r5,5 # count/17 - rlwnm r6,r6,r7,0x10000 - rlwimi r5,r5,4,27,27 # 17*(count/17) - xor r0,r0,r6 # (15)0:CF:data16 - sub r3,r3,r5 # count%17 - rlwinm r4,r0,15,0xffff0000 # CF:(15 MSB of data16):(16)0 - slw r0,r0,r3 # New carry and MSBs - rlwnm r4,r4,r3,16,31 # New LSBs - beq- nop # no flags changed if count 0 - ROTATE_FLAGS - add r0,r0,r4 # result - rlwimi flags,r0,11,CF_VALUE - sthbrx r0,MEM - rlwimi flags,r0,10,OF_XOR - NEXT - -/* Longword rcl only needs 64 bits because the maximum rotate count is 31 ! */ -rcll_imm: NEXTBYTE(r3) - b 1f -rcll_cl: lbz r3,CL(state) - b 1f -rcll_1: li r3,1 -1: lwbrx r0,MEM - andi. r3,r3,31 # count=count%32 - addc r4,r4,flags # ~XER[CA] - RES2CF(r6) - subfe r4,result,op1 - CF_ROTCNT(r7) - addze r6,r6 - srwi r4,r0,1 # 0:(31 MSB of data32) - addi r7,r7,23 - CF_POL_INSERT(r4,0) - rlwnm r6,r6,r7,0,0 - beq- nop # no flags changed if count 0 - subfic r5,r3,32 - xor r4,r4,r6 - ROTATE_FLAGS - slw r0,r0,r3 # New MSBs - srw r5,r4,r5 # New LSBs - rlwnm r4,r4,r3,0,0 # New Carry - add r0,r0,r5 # result - rlwimi flags,r4,28,CF_VALUE - rlwimi flags,r0,27,OF_XOR - stwbrx r0,MEM - NEXT - -/* right rotates through carry are even worse because PPC only has a left -rotate instruction. Somewhat tough when combined with modulo 9, 17, or -33 operation and the rules of OF and CF flag settings. */ -/* Byte rcr is performed on 17 bits */ -rcrb_imm: NEXTBYTE(r3) - b 1f -rcrb_cl: lbz r3,CL(state) - b 1f -rcrb_1: li r3,1 -1: lbzx r0,MEM - andi. r3,r3,31 # count%32 - addc r4,flags,flags # cf_in->xer[ca] - RES2CF(r6) - mulli r5,r3,29 # 29=ceil(256/9) - subfe r4,result,op1 - CF_ROTCNT(r7) - addze r6,r6 - CF_POL_INSERT(r0,23) - srwi r5,r5,8 # count/9 - rlwimi r0,r0,9,0x0001fe00 # (15)0:data8:0:data8 - rlwnm r6,r6,r7,0x100 - rlwimi r5,r5,3,26,28 # 9*(count/9) - xor r0,r0,r6 # (15)0:data8:CF:data8 - sub r3,r3,r5 # count%9 - beq- nop # no flags changed if count 0 - ROTATE_FLAGS - srw r0,r0,r3 # (23)junk:NewCF:Result8 - rlwimi flags,r0,19,CF_VALUE|OF_XOR - stbx r0,MEM - NEXT - -/* Word rcr is a 33 bit right shift with a quirk, because the 33rd bit -is only needed when the rotate count is 16 and rotating left or right -by 16 a 32 bit quantity is the same ! */ -rcrw_imm: NEXTBYTE(r3) - b 1f -rcrw_cl: lbz r3,CL(state) - b 1f -rcrw_1: li r3,1 -1: lhbrx r0,MEM - andi. r3,r3,31 # count%32 - addc r4,flags,flags # cf_in->xer[ca] - RES2CF(r6) - subfe r4,result,op1 - addi r5,r3,15 # >=32 if >=17 - CF_ROTCNT(r7) - addze r6,r6 - addi r7,r7,8 - CF_POL_INSERT(r0,15) - srwi r5,r5,5 # count/17 - rlwnm r6,r6,r7,0x10000 - rlwinm r7,r0,16,0x01 # MSB of data16 - rlwimi r0,r0,17,0xfffe0000 # (15 MSB of data16):0:data16 - rlwimi r5,r5,4,27,27 # 17*(count/17) - xor r0,r0,r6 # (15 MSB of data16):CF:data16 - sub r3,r3,r5 # count%17 - beq- nop # no flags changed if count 0 - srw r0,r0,r3 # shift right - rlwnm r7,r7,r3,0x10000 # just in case count=16 - ROTATE_FLAGS - add r0,r0,r7 # junk15:NewCF:result16 - rlwimi flags,r0,11,CF_VALUE|OF_XOR - sthbrx r0,MEM - NEXT - -/* Longword rcr need only 64 bits since the rotate count is limited to 31 */ -rcrl_imm: NEXTBYTE(r3) - b 1f -rcrl_cl: lbz r3,CL(state) - b 1f -rcrl_1: li r3,1 -1: lwbrx r0,MEM - andi. r3,r3,31 # count%32 - addc r4,flags,flags - RES2CF(r6) - subfe r4,result,op1 - CF_ROTCNT(r7) - slwi r4,r0,1 # (31MSB of data32):0 - addze r6,r6 - addi r7,r7,24 - CF_POL_INSERT(r4,31) - rlwnm r6,r6,r7,0x01 - beq- nop # no flags changed if count 0 - subfic r7,r3,32 - xor r4,r4,r6 - srw r0,r0,r3 # Result LSB - slw r5,r4,r7 # Result MSB - srw r4,r4,r3 # NewCF in LSB - add r0,r0,r5 # result - rlwimi flags,r4,27,CF_VALUE - stwbrx r0,MEM - rlwimi flags,r0,27,OF_XOR - NEXT - -/* After the rotates through carry, normal rotates are so simple ! */ -rolb_imm: NEXTBYTE(r3) - b 1f -rolb_cl: lbz r3,CL(state) - b 1f -rolb_1: li r3,1 -1: lbzx r0,MEM - andi. r4,r3,31 # count%32 == 0 ? - clrlwi r3,r3,29 # count%8 - rlwimi r0,r0,24,0xff000000 # replicate for shift in - beq- nop # no flags changed if count 0 - ROTATE_FLAGS - rotlw r0,r0,r3 - rlwimi flags,r0,27,CF_VALUE # New CF - stbx r0,MEM - rlwimi flags,r0,26,OF_XOR # New OF (CF xor MSB) - NEXT - -rolw_imm: NEXTBYTE(r3) - b 1f -rolw_cl: lbz r3,CL(state) - b 1f -rolw_1: li r3,1 -1: lhbrx r0,MEM - andi. r3,r3,31 - rlwimi r0,r0,16,0,15 # duplicate - beq- nop # no flags changed if count 0 - ROTATE_FLAGS - rotlw r0,r0,r3 # result word duplicated - rlwimi flags,r0,27,CF_VALUE # New CF - sthbrx r0,MEM - rlwimi flags,r0,26,OF_XOR # New OF (CF xor MSB) - NEXT - -roll_imm: NEXTBYTE(r3) - b 1f -roll_cl: lbz r3,CL(state) - b 1f -roll_1: li r3,1 -1: lwbrx r0,MEM - andi. r3,r3,31 - beq- nop # no flags changed if count 0 - ROTATE_FLAGS - rotlw r0,r0,r3 # result - rlwimi flags,r0,27,CF_VALUE # New CF - stwbrx r0,MEM - rlwimi flags,r0,26,OF_XOR # New OF (CF xor MSB) - NEXT - -rorb_imm: NEXTBYTE(r3) - b 1f -rorb_cl: lbz r3,CL(state) - b 1f -rorb_1: li r3,1 -1: lbzx r0,MEM - andi. r4,r3,31 # count%32 == 0 ? - clrlwi r3,r3,29 # count%8 - rlwimi r0,r0,8,0x0000ff00 # replicate for shift in - beq- nop # no flags changed if count 0 - ROTATE_FLAGS - srw r0,r0,r3 - rlwimi flags,r0,20,CF_VALUE - stbx r0,MEM - rlwimi flags,r0,19,OF_XOR - NEXT - -rorw_imm: NEXTBYTE(r3) - b 1f -rorw_cl: lbz r3,CL(state) - b 1f -rorw_1: li r3,1 -1: lhbrx r0,MEM - andi. r4,r3,31 - clrlwi r3,r3,28 # count %16 - rlwimi r0,r0,16,0xffff0000 # duplicate - beq- nop # no flags changed if count 0 - ROTATE_FLAGS - srw r0,r0,r3 # junk16:result16 - rlwimi flags,r0,12,CF_VALUE - sthbrx r0,MEM - rlwimi flags,r0,11,OF_XOR - NEXT - -rorl_imm: NEXTBYTE(r3) - b 1f -rorl_cl: lbz r3,CL(state) - b 1f -rorl_1: li r3,1 -1: lwbrx r0,MEM - andi. r4,r3,31 - neg r3,r3 - beq- nop # no flags changed if count 0 - ROTATE_FLAGS - rotlw r0,r0,r3 # result - rlwimi flags,r0,28,CF_VALUE - stwbrx r0,MEM - rlwimi flags,r0,27,OF_XOR - NEXT - -/* Right arithmetic shifts: they clear OF whenever count!=0 */ -#define SAR_FLAGS CF_ZERO|OF_ZERO|RESL -sarb_imm: NEXTBYTE(r3) - b 1f -sarb_cl: lbz r3,CL(state) - b 1f -sarb_1: li r3,1 -1: lbzx r4,MEM - andi. r3,r3,31 - addi r5,r3,-1 - extsb r4,r4 - beq- nop # no flags changed if count 0 - SET_FLAGS(SAR_FLAGS) - sraw result,r4,r3 - srw r5,r4,r5 - stbx result,MEM - rlwimi flags,r5,27,CF_VALUE - NEXT - -sarw_imm: NEXTBYTE(r3) - b 1f -sarw_cl: lbz r3,CL(state) - b 1f -sarw_1: li r3,1 -1: lhbrx r4,MEM - andi. r3,r3,31 - addi r5,r3,-1 - extsh r4,r4 - beq- nop # no flags changed if count 0 - SET_FLAGS(SAR_FLAGS) - sraw result,r4,r3 - srw r5,r4,r5 - sthbrx result,MEM - rlwimi flags,r5,27,CF_VALUE - NEXT - -sarl_imm: NEXTBYTE(r3) - b 1f -sarl_cl: lbz r3,CL(state) - b 1f -sarl_1: li r3,1 -1: lwbrx r4,MEM - andi. r3,r3,31 - addi r5,r3,-1 - beq- nop # no flags changed if count 0 - SET_FLAGS(SAR_FLAGS) - sraw result,r4,r3 - srw r5,r4,r5 - stwbrx result,MEM - rlwimi flags,r5,27,CF_VALUE - NEXT - -/* Left shifts are quite easy: they use the flag mechanism of add */ -shlb_imm: NEXTBYTE(r3) - b 1f -shlb_cl: lbz r3,CL(state) - b 1f -shlb_1: li r3,1 -1: andi. r3,r3,31 - beq- nop # no flags changed if count 0 - lbzx op1,MEM - SET_FLAGS(FLAGS_ADD(B)) - slw result,op1,r3 - addi op2,op1,0 # for OF computation only ! - stbx result,MEM - NEXT - -shlw_imm: NEXTBYTE(r3) - b 1f -shlw_cl: lbz r3,CL(state) - b 1f -shlw_1: li r3,1 -1: andi. r3,r3,31 - beq- nop # no flags changed if count 0 - lhbrx op1,MEM - SET_FLAGS(FLAGS_ADD(W)) - slw result,op1,r3 - addi op2,op1,0 # for OF computation only ! - sthbrx result,MEM - NEXT - -/* That one may be wrong */ -shll_imm: NEXTBYTE(r3) - b 1f -shll_cl: lbz r3,CL(state) - b 1f -shll_1: li r3,1 -1: andi. r3,r3,31 - beq- nop # no flags changed if count 0 - lwbrx op1,MEM - addi r4,r3,-1 - SET_FLAGS(FLAGS_ADD(L)) - slw result,op1,r3 - addi op2,op1,0 # for OF computation only ! - slw op1,op1,r4 # for CF computation - stwbrx result,MEM - NEXT - -/* Right shifts are quite complex, because of funny flag rules ! */ -shrb_imm: NEXTBYTE(r3) - b 1f -shrb_cl: lbz r3,CL(state) - b 1f -shrb_1: li r3,1 -1: andi. r3,r3,31 - beq- nop # no flags changed if count 0 - lbzx op1,MEM - addi r4,r3,-1 - SET_FLAGS(FLAGS_SHR(B)) - srw result,op1,r3 - srw r4,op1,r4 - li op2,-1 # for OF computation only ! - stbx result,MEM - rlwimi flags,r4,27,CF_VALUE # Set CF - NEXT - -shrw_imm: NEXTBYTE(r3) - b 1f -shrw_cl: lbz r3,CL(state) - b 1f -shrw_1: li r3,1 -1: andi. r3,r3,31 - beq- nop # no flags changed if count 0 - lhbrx op1,MEM - addi r4,r3,-1 - SET_FLAGS(FLAGS_SHR(W)) - srw result,op1,r3 - srw r4,op1,r4 - li op2,-1 # for OF computation only ! - sthbrx result,MEM - rlwimi flags,r4,27,CF_VALUE # Set CF - NEXT - -shrl_imm: NEXTBYTE(r3) - b 1f -shrl_cl: lbz r3,CL(state) - b 1f -shrl_1: li r3,1 -1: andi. r3,r3,31 - beq- nop # no flags changed if count 0 - lwbrx op1,MEM - addi r4,r3,-1 - SET_FLAGS(FLAGS_SHR(L)) - srw result,op1,r3 - srw r4,op1,r4 - li op2,-1 # for OF computation only ! - stwbrx result,MEM - rlwimi flags,r4,27,CF_VALUE # Set CF - NEXT - -/* Double length shifts, shldw uses FLAGS_ADD for simplicity */ -shldw_imm: NEXTBYTE(r3) - b 1f -shldw_cl: lbz r3,CL(state) -1: andi. r3,r3,31 - beq- nop - lhbrx op1,MEM - SET_FLAGS(FLAGS_ADD(W)) - lhbrx op2,REG - rlwimi op1,op2,16,0,15 # op2:op1 - addi op2,op1,0 - rotlw result,op1,r3 - sthbrx result,MEM - NEXT - -shldl_imm: NEXTBYTE(r3) - b 1f -shldl_cl: lbz r3,CL(state) -1: andi. r3,r3,31 - beq- nop - lwbrx op1,MEM - SET_FLAGS(FLAGS_DBLSH(L)) - lwbrx op2,REG - subfic r4,r3,32 - slw result,op1,r3 - srw r4,op2,r4 - rotlw r3,op1,r3 - or result,result,r4 - addi op2,op1,0 - rlwimi flags,r3,27,CF_VALUE - stwbrx result,MEM - NEXT - -shrdw_imm: NEXTBYTE(r3) - b 1f -shrdw_cl: lbz r3,CL(state) -1: andi. r3,r3,31 - beq- nop - lhbrx op1,MEM - SET_FLAGS(FLAGS_DBLSH(W)) - lhbrx op2,REG - addi r4,r3,-1 - rlwimi op1,op2,16,0,15 # op2:op1 - addi op2,op1,0 - srw result,op1,r3 - srw r4,op1,r4 - sthbrx result,MEM - rlwimi flags,r4,27,CF_VALUE - NEXT - -shrdl_imm: NEXTBYTE(r3) - b 1f -shrdl_cl: lbz r3,CL(state) -1: andi. r3,r3,31 - beq- nop - lwbrx op1,MEM - SET_FLAGS(FLAGS_DBLSH(L)) - lwbrx op2,REG - subfic r4,r3,32 - srw result,op1,r3 - addi r3,r3,-1 - slw r4,op2,r4 - srw r3,op1,r3 - or result,result,r4 - addi op2,op1,0 - rlwimi flags,r3,27,CF_VALUE - stwbrx result,MEM - NEXT - -/* One operand multiplies: with result double the operand size, unsigned */ -mulb: lbzx op2,MEM - lbz op1,AL(state) - mullw result,op1,op2 - SET_FLAGS(FLAGS_MUL) - subfic r3,result,255 - sthbrx result,AX,state - rlwimi flags,r3,0,CF_VALUE|OF_VALUE - NEXT - -mulw: lhbrx op2,MEM - lhbrx op1,AX,state - mullw result,op1,op2 - SET_FLAGS(FLAGS_MUL) - li r4,DX - srwi r3,result,16 - sthbrx result,AX,state - neg r5,r3 - sthbrx r3,r4,state # DX - rlwimi flags,r5,0,CF_VALUE|OF_VALUE - NEXT - -mull: lwbrx op2,MEM - lwbrx op1,EAX,state - mullw result,op1,op2 - mulhwu. r3,op1,op2 - SET_FLAGS(FLAGS_MUL) - stwbrx result,EAX,state - li r4,EDX - stwbrx r3,r4,state - beq+ nop - oris flags,flags,(CF_SET|OF_SET)>>16 - NEXT - -/* One operand multiplies: with result double the operand size, signed */ -imulb: lbzx op2,MEM - extsb op2,op2 - lbz op1,AL(state) - extsb op1,op1 - mullw result,op1,op2 - SET_FLAGS(FLAGS_MUL) - extsb r3,result - sthbrx result,AX,state - cmpw r3,result - beq+ nop - oris flags,flags,(CF_SET|OF_SET)>>16 - NEXT - -imulw: lhbrx op2,MEM - extsh op2,op2 - lhbrx op1,AX,state - extsh op1,op1 - mullw result,op1,op2 - SET_FLAGS(FLAGS_MUL) - li r3,DX - extsh r4,result - srwi r5,result,16 - sthbrx result,AX,state - cmpw r4,result - sthbrx r5,r3,state - beq+ nop - oris flags,flags,(CF_SET|OF_SET)>>16 - NEXT - -imull: lwbrx op2,MEM - SET_FLAGS(FLAGS_MUL) - lwbrx op1,EAX,state - li r3,EDX - mulhw r4,op1,op2 - mullw result,op1,op2 - stwbrx r4,r3,state - srawi r3,result,31 - cmpw r3,r4 - beq+ nop - oris flags,flags,(CF_SET|OF_SET)>>16 - NEXT - -/* Other multiplies */ -imulw_mem_reg: lhbrx op2,REG - extsh op2,op2 - b 1f - -imulw_imm: NEXTWORD(op2) - extsh op2,op2 - b 1f - -imulw_imm8: NEXTBYTE(op2) - extsb op2,op2 -1: lhbrx op1,MEM - extsh op1,op1 - mullw result,op1,op2 - SET_FLAGS(FLAGS_MUL) - extsh r3,result - sthbrx result,REG - cmpw r3,result - beq+ nop - oris flags,flags,(CF_SET|OF_SET)>>16 - NEXT # SF/ZF/AF/PF undefined ! - -imull_mem_reg: lwbrx op2,REG - b 1f - -imull_imm: NEXTDWORD(op2) - b 1f - -imull_imm8: NEXTBYTE(op2) - extsb op2,op2 -1: lwbrx op1,MEM - mullw result,op1,op2 - SET_FLAGS(FLAGS_MUL) - mulhw r3,op1,op2 - srawi r4,result,31 - stwbrx result,REG - cmpw r3,r4 - beq+ nop - oris flags,flags,(CF_SET|OF_SET)>>16 - NEXT # SF/ZF/AF/PF undefined ! - -/* aad is indeed a multiply */ -aad: NEXTBYTE(r3) - lbz op1,AH(state) - lbz op2,AL(state) - mullw result,op1,r3 # AH*imm - SET_FLAGS(FLAGS_LOG(B)) # SF/ZF/PF from result - add result,result,op2 # AH*imm+AL - slwi r3,result,8 - sth r3,AX(state) # AH=0 - NEXT # OF/AF/CF undefined - -/* Unsigned divides: we may destroy all flags */ -divb: lhbrx r4,AX,state - lbzx r3,MEM - srwi r5,r4,8 - cmplw r5,r3 - bnl- _divide_error - divwu r5,r4,r3 - mullw r3,r5,r3 - sub r3,r4,r3 - stb r5,AL(state) - stb r3,AH(state) - NEXT - -divw: li opreg,DX - lhbrx r4,AX,state - lhbrx r5,REG - lhbrx r3,MEM - insrwi r4,r5,16,0 - cmplw r5,r3 - bnl- _divide_error - divwu r5,r4,r3 - mullw r3,r5,r3 - sub r3,r4,r3 - sthbrx r5,AX,state - sthbrx r3,REG - NEXT - -divl: li opreg,EDX # Not yet fully implemented - lwbrx r3,MEM - lwbrx r4,REG - lwbrx r5,EAX,state - cmplw r4,r3 - bnl- _divide_error - cmplwi r4,0 - bne- 1f - divwu r4,r5,r3 - mullw r3,r4,r3 - stwbrx r4,EAX,state - sub r3,r5,r3 - stwbrx r3,REG - NEXT -/* full implementation of 64:32 unsigned divide, slow but rarely used */ -1: bl _div_64_32 - stwbrx r5,EAX,state - stwbrx r4,REG - NEXT -/* - * Divide r4:r5 by r3, quotient in r5, remainder in r4. - * The algorithm is stupid because it won't be used very often. - */ -_div_64_32: li r7,32 - mtctr r7 -1: cmpwi r4,0 # always subtract in case - addc r5,r5,r5 # MSB is set - adde r4,r4,r4 - blt 2f - cmplw r4,r3 - blt 3f -2: sub r4,r4,r3 - addi r5,r5,1 -3: bdnz 1b - -/* Signed divides: we may destroy all flags */ -idivb: lbzx r3,MEM - lhbrx r4,AX,state - cmpwi r3,0 - beq- _divide_error - divw r5,r4,r3 - extsb r7,r5 - mullw r3,r5,r3 - cmpw r5,r7 - sub r3,r4,r3 - bne- _divide_error - stb r5,AL(state) - stb r3,AH(state) - NEXT - -idivw: li opreg,DX - lhbrx r4,AX,state - lhbrx r5,REG - lhbrx r3,MEM - insrwi r4,r5,16,0 - cmpwi r3,0 - beq- _divide_error - divw r5,r4,r3 - extsh r7,r5 - mullw r3,r5,r3 - cmpw r5,r7 - sub r3,r4,r3 - bne- _divide_error - sthbrx r5,AX,state - sthbrx r3,REG - NEXT - -idivl: li opreg,EDX # Not yet fully implemented - lwbrx r3,MEM - lwbrx r5,EAX,state - cmpwi cr1,r3,0 - lwbrx r4,REG - srwi r7,r5,31 - beq- _divide_error - add. r7,r7,r4 - bne- 1f # EDX not sign extension of EAX - divw r4,r5,r3 - xoris r7,r5,0x8000 # only overflow case is - orc. r7,r7,r3 # 0x80000000 divided by -1 - mullw r3,r4,r3 - beq- _divide_error - stwbrx r4,EAX,state - sub r3,r5,r3 - stwbrx r3,REG - NEXT - -/* full 64 by 32 signed divide, checks for overflow might be right now */ -1: srawi r6,r4,31 # absolute value of r4:r5 - srawi r0,r3,31 # absolute value of r3 - xor r5,r5,r6 - xor r3,r3,r0 - subfc r5,r6,r5 - xor r4,r4,r6 - sub r3,r3,r0 - subfe r4,r6,r4 - xor r0,r0,r6 # sign of result - cmplw r4,r3 # coarse overflow detection - bnl- _divide_error # (probably not necessary) - bl _div_64_32 - xor r5,r5,r0 # apply sign to result - sub r5,r5,r0 - xor. r7,r0,r5 # wrong sign: overflow - xor r4,r4,r6 # apply sign to remainder - blt- _divide_error - stwbrx r5,EAX,state - sub r4,r4,r6 - stwbrx r4,REG - NEXT - -/* aam is indeed a divide */ -aam: NEXTBYTE(r3) - lbz r4,AL(state) - cmpwi r3,0 - beq- _divide_error # zero divide - divwu op2,r4,r3 # AL/imm8 - SET_FLAGS(FLAGS_LOG(B)) # SF/ZF/PF from AL - mullw r3,op2,r3 # (AL/imm8)*imm8 - stb op2,AH(state) - sub result,r4,r3 # AL-imm8*(AL/imm8) - stb result,AL(state) - NEXT # OF/AF/CF undefined - -_divide_error: li r3,code_divide_err - b complex - -/* Instructions dealing with segment registers */ -pushw_sp_sr: li r3,SP - rlwinm opreg,opcode,31,27,29 - addi r5,state,SELECTORS+2 - lhbrx r4,state,r3 - lhzx r0,r5,opreg - addi r4,r4,-2 - sthbrx r4,state,r3 - clrlwi r4,r4,16 - sthbrx r0,r4,ssb - NEXT - -pushl_sp_sr: li r3,SP - rlwinm opreg,opcode,31,27,29 - addi r5,state,SELECTORS+2 - lhbrx r4,state,r3 - lhzx r0,r5,opreg - addi r4,r4,-4 - sthbrx r4,state,r3 - clrlwi r4,r4,16 - stwbrx r0,r4,ssb - NEXT - -movl_sr_mem: cmpwi opreg,20 - addi opreg,opreg,SELECTORS+2 - cmpw cr1,base,state # Only registers are sensitive - bgt- ud # to word/longword difference - lhzx r0,REG - bne cr1,1f - stwbrx r0,MEM # Actually a register - NEXT - -movw_sr_mem: cmpwi opreg,20 # SREG 0 to 5 only - addi opreg,opreg,SELECTORS+2 - bgt- ud - lhzx r0,REG -1: sthbrx r0,MEM - NEXT - -/* Now the instructions that modify the segment registers, note that -move/pop to ss disable interrupts and traps for one instruction ! */ -popl_sp_sr: li r6,4 - b 1f -popw_sp_sr: li r6,2 -1: li r7,SP - rlwinm opreg,opcode,31,27,29 - lhbrx offset,state,r7 - addi opreg,opreg,SELBASES - lhbrx r4,ssb,offset # new selector - add offset,offset,r6 - bl _segment_load - sthbrx offset,state,r7 # update sp - cmpwi opreg,8 # is ss ? - stwux r3,REG - stw r4,SELECTORS-SELBASES(opreg) - lwz esb,esbase(state) - bne+ nop - lwz ssb,ssbase(state) # pop ss - crmove RF,TF # prevent traps - NEXT - -movw_mem_sr: cmpwi opreg,20 - addi r7,state,SELBASES - bgt- ud - cmpwi opreg,4 # CS illegal - beq- ud - lhbrx r4,MEM - bl _segment_load - stwux r3,r7,opreg - cmpwi opreg,8 - stw r4,SELECTORS-SELBASES(r7) - lwz esb,esbase(state) - bne+ nop - lwz ssb,ssbase(state) - crmove RF,TF # prevent traps - NEXT - - .equ movl_mem_sr, movw_mem_sr - -/* The encoding of les/lss/lds/lfs/lgs is strange, opcode is c4/b2/c5/b4/b5 -for es/ss/ds/fs/gs which are sreg 0/2/3/4/5. And obviously there is -no lcs instruction, it's called a far jump. */ - -ldlptrl: lwzux r7,MEM - li r4,4 - bl 1f - stwx r7,REG - NEXT -ldlptrw: lhzux r7,MEM - li r4,2 - bl 1f - sthx r7,REG - NEXT - -1: cmpw base,state - lis r3,0xc011 # es/ss/ds/fs/gs - rlwinm r5,opcode,2,0x0c # 00/08/04/00/04 - mflr r0 - addi r3,r3,0x4800 # r4=0xc0114800 - rlwimi r5,opcode,0,0x10 # 00/18/04/10/14 - lhbrx r4,r4,offset - rlwnm opcode,r3,r5,0x1c # 00/08/0c/10/14 = sreg*4 ! - beq- ud # Only mem operands allowed ! - bl _segment_load - addi r5,opcode,SELBASES - stwux r3,r5,state - mtlr r0 - stw r4,SELECTORS-SELBASES(r5) - lwz esb,esbase(state) # keep shadow state in sync - lwz ssb,ssbase(state) - blr - -/* Intructions that may modify the current code segment: the next optimization - * might be to avoid calling C code when the code segment does not change. But - * it's probably not worth the effort. - */ -/* Far calls, jumps and returns */ -lcall_w: NEXTWORD(r4) - NEXTWORD(r5) - li r3,code_lcallw - b complex - -lcall_l: NEXTDWORD(r4) - NEXTWORD(r5) - li r3,code_lcalll - b complex - -lcallw: lhbrx r4,MEM - addi offset,offset,2 - lhbrx r5,MEM - li r3,code_lcallw - b complex - -lcalll: lwbrx r4,MEM - addi offset,offset,4 - lhbrx r5,MEM - li r3,code_lcalll - b complex - -ljmp_w: NEXTWORD(r4) - NEXTWORD(r5) - li r3,code_ljmpw - b complex - -ljmp_l: NEXTDWORD(r4) - NEXTWORD(r5) - li r3,code_ljmpl - b complex - -ljmpw: lhbrx r4,MEM - addi offset,offset,2 - lhbrx r5,MEM - li r3,code_ljmpw - b complex - -ljmpl: lwbrx r4,MEM - addi offset,offset,4 - lhbrx r5,MEM - li r3,code_ljmpl - b complex - -lretw_imm: NEXTWORD(r4) - b 1f -lretw: li r4,0 -1: li r3,code_lretw - b complex - -lretl_imm: NEXTWORD(r4) - b 1f -lretl: li r4,0 -1: li r3,code_lretl - b complex - -/* Interrupts */ -int: li r3,code_softint # handled by C code - NEXTBYTE(r4) - b complex - -int3: li r3,code_int3 # handled by C code - b complex - -into: EVAL_OF - bf+ OF,nop - li r3,code_into - b complex # handled by C code - -iretw: li r3,code_iretw # handled by C code - b complex - -iretl: li r3,code_iretl - b complex - -/* Miscellaneous flag control instructions */ -clc: oris flags,flags,(CF_IN_CR|CF_STATE_MASK|ABOVE_IN_CR)>>16 - xoris flags,flags,(CF_IN_CR|CF_STATE_MASK|ABOVE_IN_CR)>>16 - NEXT - -cmc: oris flags,flags,(CF_IN_CR|ABOVE_IN_CR)>>16 - xoris flags,flags,(CF_IN_CR|CF_COMPLEMENT|ABOVE_IN_CR)>>16 - NEXT - -stc: oris flags,flags,\ - (CF_IN_CR|CF_LOCATION|CF_COMPLEMENT|ABOVE_IN_CR)>>16 - xoris flags,flags,(CF_IN_CR|CF_LOCATION|ABOVE_IN_CR)>>16 - NEXT - -cld: crclr DF - NEXT - -std: crset DF - NEXT - -cli: crclr IF - NEXT - -sti: crset IF - NEXT - -lahf: bl _eval_flags - stb r3,AH(state) - NEXT - -sahf: andis. r3,flags,OF_EXPLICIT>>16 - lbz r0,AH(state) - beql+ _eval_of # save OF just in case - rlwinm op1,r0,31,0x08 # AF - rlwinm flags,flags,0,OF_STATE_MASK - extsb result,r0 # SF/PF - ZF862ZF(r0) - oris flags,flags,(ZF_PROTECT|ZF_IN_CR|SF_IN_CR)>>16 - addi op2,op1,0 # AF - ori result,result,0x00fb # set all except PF - mtcrf 0x02,r0 # SF/ZF - rlwimi flags,r0,27,CF_VALUE # CF - xori result,result,0x00ff # 00 if PF set, 04 if clear - NEXT - -pushfw_sp: bl _eval_flags - li r4,SP - lhbrx r5,r4,state - addi r5,r5,-2 - sthbrx r5,r4,state - clrlwi r5,r5,16 - sthbrx r3,ssb,r5 - NEXT - -pushfl_sp: bl _eval_flags - li r4,SP - lhbrx r5,r4,state - addi r5,r5,-4 - sthbrx r5,r4,state - clrlwi r5,r5,16 - stwbrx r3,ssb,r5 - NEXT - -popfl_sp: li r4,SP - lhbrx r5,r4,state - lwbrx r3,ssb,r5 - addi r5,r5,4 - stw r3,eflags(state) - sthbrx r5,r4,state - b 1f - -popfw_sp: li r4,SP - lhbrx r5,r4,state - lhbrx r3,ssb,r5 - addi r5,r5,2 - sth r3,eflags+2(state) - sthbrx r5,r4,state -1: rlwinm op1,r3,31,0x08 # AF - xori result,r3,4 # PF - ZF862ZF(r3) # cr6 - lis flags,(OF_EXPLICIT|ZF_PROTECT|ZF_IN_CR|SF_IN_CR)>>16 - addi op2,op1,0 # AF - rlwinm result,result,0,0x04 # PF - rlwimi flags,r3,27,CF_VALUE # CF - mtcrf 0x6,r3 # IF,DF,TF,SF,ZF - rlwimi result,r3,24,0,0 # SF - rlwimi flags,r3,15,OF_VALUE # OF - NEXT - -/* SETcc is slightly faster for setz/setnz */ -setz: EVAL_ZF - bt ZF,1f -0: cmpwi opreg,0 - bne- ud - stbx opreg,MEM - NEXT - -setnz: EVAL_ZF - bt ZF,0b -1: cmpwi opreg,0 - bne- ud - stbx one,MEM - NEXT - -#define SETCC(cond, eval, flag) \ -set##cond: EVAL_##eval; bt flag,1b; b 0b; \ -setn##cond: EVAL_##eval; bt flag,0b; b 1b - - SETCC(c, CF, CF) - SETCC(a, ABOVE, ABOVE) - SETCC(s, SF, SF) - SETCC(g, SIGNED, SGT) - SETCC(l, SIGNED, SLT) - SETCC(o, OF, OF) - SETCC(p, PF, PF) - -/* No wait for a 486SX */ - .equ wait, nop - -/* ARPL is not recognized in real mode */ - .equ arpl, ud - -/* clts and in general control and debug registers are not implemented */ - .equ clts, unimpl - -aaa: lhbrx r0,AX,state - bl _eval_af - rlwinm r3,r3,0,0x10 - SET_FLAGS(FLAGS_ADD(W)) - rlwimi r3,r0,0,0x0f - li r4,0x106 - addi r3,r3,-10 - srwi r3,r3,16 # carry ? 0 : 0xffff - andc op1,r4,r3 # carry ? 0x106 : 0 - add result,r0,op1 - rlwinm result,result,0,28,23 # clear high half of AL - li op2,10 # sets AF indirectly - sthbrx r3,AX,state # OF/SF/ZF/PF undefined ! - rlwimi result,op1,8,0x10000 # insert CF - NEXT - -aas: lhbrx r0,AX,state - bl _eval_af - rlwinm r3,r3,0,0x10 - SET_FLAGS(FLAGS_ADD(W)) - rlwimi r3,r0,0,0x0f # AF:AL&0x0f - li r4,0x106 - addi r3,r3,-10 - srwi r3,r3,16 # carry ? 0 : 0xffff - andc op1,r4,r3 # carry ? 0x106 : 0 - sub result,r0,op1 - rlwinm result,result,0,28,23 # clear high half of AL - li op2,10 # sets AF indirectly - sthbrx r3,AX,state # OF/SF/ZF/PF undefined ! - rlwimi result,op1,8,0x10000 # insert CF - NEXT - -daa: lbz r0,AL(state) - bl _eval_af - rlwinm r7,r3,0,0x10 - bl _eval_cf # r3=CF<<8 - rlwimi r7,r0,0,0x0f - SET_FLAGS(FLAGS_ADD(B)) - addi r4,r7,-10 - rlwinm r4,r4,3,0x06 # 6 if AF or >9, 0 otherwise - srwi op1,r7,1 # 0..4, no AF, 5..f AF set - add r0,r0,r4 # conditional add - li op2,11 # sets AF depnding on op1 - or r0,r0,r3 - subfic r3,r0,159 - rlwinm r3,r3,7,0x60 # mask value to add - add result,r0,r3 # final result for SF/ZF/PF - stb result,AL(state) - rlwimi result,r3,2,0x100 # set CF if added - NEXT - -das: lbz r0,AL(state) - bl _eval_af - rlwinm r7,r3,0,0x10 - bl _eval_cf - rlwimi r7,r0,0,0x0f - SET_FLAGS(FLAGS_ADD(B)) - addi r4,r7,-10 - rlwinm r4,r4,3,0x06 - srwi op1,r7,1 # 0..4, no AF, 5..f AF set - sub r0,r0,r4 # conditional add - li op2,11 # sets AF depending on op1 - or r4,r0,r3 # insert CF - addi r3,r4,-160 - rlwinm r3,r3,7,0x60 # mask value to add - sub result,r4,r3 # final result for SF/ZF/PF - stb result,AL(state) - rlwimi result,r3,2,0x100 # set CF - NEXT - -/* 486 specific instructions */ - -/* For cmpxchg, only the zero flag is important */ - -cmpxchgb: lbz op1,AL(state) - SET_FLAGS(FLAGS_SUB(B)|ZF_IN_CR) - lbzx op2,MEM - cmpw cr6,op1,op2 - sub result,op1,op2 - bne cr6,1f - lbzx r3,REG # success: swap - stbx r3,MEM - NEXT -1: stb op2,AL(state) - NEXT - -cmpxchgw: lhbrx op1,AX,state - SET_FLAGS(FLAGS_SUB(W)|ZF_IN_CR) - lhbrx op2,MEM - cmpw cr6,op1,op2 - sub result,op1,op2 - bne cr6,1f - lhzx r3,REG # success: swap - sthx r3,MEM - NEXT -1: sthbrx op2,AX,state - NEXT - -cmpxchgl: lwbrx op1,EAX,state - SET_FLAGS(FLAGS_SUB(L)|ZF_IN_CR|SIGNED_IN_CR) - lwbrx op2,MEM - cmpw cr6,op1,op2 - sub result,op1,op2 - bne cr6,1f - lwzx r3,REG # success: swap - stwx r3,MEM - NEXT -1: stwbrx op2,EAX,state - NEXT - -xaddb: lbzx op2,MEM - SET_FLAGS(FLAGS_ADD(B)) - lbzx op1,REG - add result,op1,op2 - stbx result,MEM - stbx op2,REG - NEXT - -xaddw: lhbrx op2,MEM - SET_FLAGS(FLAGS_ADD(W)) - lhbrx op1,REG - add result,op1,op2 - sthbrx result,MEM - sthbrx op2,REG - NEXT - -xaddl: lwbrx op2,MEM - SET_FLAGS(FLAGS_ADD(L)) - lwbrx op1,REG - add result,op1,op2 - stwbrx result,MEM - stwbrx op2,REG - NEXT - -/* All FPU instructions skipped. This is a 486 SX ! */ -esc: li r3,code_dna # DNA interrupt - b complex - - .equ hlt, unimpl # Cannot stop - - .equ invd, unimpl - -/* Undefined in real address mode */ - .equ lar, ud - - .equ lgdt, unimpl - .equ lidt, unimpl - .equ lldt, ud - .equ lmsw, unimpl - -/* protected mode only */ - .equ lsl, ud - .equ ltr, ud - - .equ movl_cr_reg, unimpl - .equ movl_reg_cr, unimpl - .equ movl_dr_reg, unimpl - .equ movl_reg_dr, unimpl - - .equ sgdt, unimpl - - .equ sidt, unimpl - .equ sldt, ud - .equ smsw, unimpl - - .equ str, ud - -ud: li r3,code_ud - li r4,0 - b complex - -unimpl: li r3,code_ud - li r4,1 - b complex - - .equ verr, ud - .equ verw, ud - .equ wbinvd, unimpl - -em86_end: - .size em86_enter,em86_end-em86_enter -#ifdef __BOOT__ - .data -#define ENTRY(x,t) .long x+t-_jtables -#else - .section .rodata -#define ENTRY(x,t) .long x+t -#endif - -#define BOP(x) ENTRY(x,2) /* Byte operation with mod/rm byte */ -#define WLOP(x) ENTRY(x,3) /* 16 or 32 bit operation with mod/rm byte */ -#define EXTOP(x) ENTRY(x,0) /* Opcode with extension in mod/rm byte */ -#define OP(x) ENTRY(x,1) /* Direct one byte opcode/prefix */ - -/* A few macros for the main table */ -#define gen6(op, wl, axeax) \ - BOP(op##b##_reg_mem); WLOP(op##wl##_reg_mem); \ - BOP(op##b##_mem_reg); WLOP(op##wl##_mem_reg); \ - OP(op##b##_imm_al); OP(op##wl##_imm_##axeax) - -#define rep7(l,t) \ - ENTRY(l,t); ENTRY(l,t); ENTRY(l,t); ENTRY(l,t); \ - ENTRY(l,t); ENTRY(l,t); ENTRY(l,t) - -#define rep8(l) l ; l; l; l; l; l; l; l; - -#define allcond(pfx, sfx, t) \ - ENTRY(pfx##o##sfx, t); ENTRY(pfx##no##sfx, t); \ - ENTRY(pfx##c##sfx, t); ENTRY(pfx##nc##sfx, t); \ - ENTRY(pfx##z##sfx, t); ENTRY(pfx##nz##sfx, t); \ - ENTRY(pfx##na##sfx, t); ENTRY(pfx##a##sfx, t); \ - ENTRY(pfx##s##sfx, t); ENTRY(pfx##ns##sfx, t); \ - ENTRY(pfx##p##sfx, t); ENTRY(pfx##np##sfx, t); \ - ENTRY(pfx##l##sfx, t); ENTRY(pfx##nl##sfx, t); \ - ENTRY(pfx##ng##sfx, t); ENTRY(pfx##g##sfx, t) - -/* single/double register sign extensions and other oddities */ -#define h2sextw cbw /* Half to Single sign extension */ -#define s2dextw cwd /* Single to Double sign extension */ -#define h2sextl cwde -#define s2dextl cdq -#define j_a16_cxz_w jcxz_w -#define j_a32_cxz_w jecxz_w -#define j_a16_cxz_l jcxz_l -#define j_a32_cxz_l jecxz_l -#define loopa16_w loopw_w -#define loopa16_l loopw_l -#define loopa32_w loopl_w -#define loopa32_l loopl_l -#define loopnza16_w loopnzw_w -#define loopnza16_l loopnzw_l -#define loopnza32_w loopnzl_w -#define loopnza32_l loopnzl_l -#define loopza16_w loopzw_w -#define loopza16_l loopzw_l -#define loopza32_w loopzl_w -#define loopza32_l loopzl_l -/* No FP support */ - -/* Addressing mode table */ - .align 5 -# (%bx,%si), (%bx,%di), (%bp,%si), (%bp,%di) -adtable: .long 0x00004360, 0x00004370, 0x80004560, 0x80004570 -# (%si), (%di), o16, (%bx) - .long 0x00004600, 0x00004700, 0x00002000, 0x00004300 -# o8(%bx,%si), o8(%bx,%di), o8(%bp,%si), o8(%bp,%di) - .long 0x00004360, 0x00004370, 0x80004560, 0x80004570 -# o8(%si), o8(%di), o8(%bp), o8(%bx) - .long 0x00004600, 0x00004700, 0x80004500, 0x00004300 -# o16(%bx,%si), o16(%bx,%di), o16(%bp,%si), o16(%bp,%di) - .long 0x00004360, 0x00004370, 0x80004560, 0x80004570 -# o16(%si), o16(%di), o16(%bp), o16(%bx) - .long 0x00004600, 0x00004700, 0x80004500, 0x00004300 -# register addressing modes do not use the table - .long 0, 0, 0, 0, 0, 0, 0, 0 -#now 32 bit modes -# (%eax), (%ecx), (%edx), (%ebx) - .long 0x00004090, 0x00004190, 0x00004290, 0x00004390 -# sib, o32, (%esi), (%edi) - .long 0x00003090, 0x00002090, 0x00004690, 0x00004790 -# o8(%eax), o8(%ecx), o8(%edx), o8(%ebx) - .long 0x00004090, 0x00004190, 0x00004290, 0x00004390 -# sib, o8(%ebp), o8(%esi), o8(%edi) - .long 0x00003090, 0x80004590, 0x00004690, 0x00004790 -# o32(%eax), o32(%ecx), o32(%edx), o32(%ebx) - .long 0x00004090, 0x00004190, 0x00004290, 0x00004390 -# sib, o32(%ebp), o32(%esi), o32(%edi) - .long 0x00003090, 0x80004590, 0x00004690, 0x00004790 -# register addressing modes do not use the table - .long 0, 0, 0, 0, 0, 0, 0, 0 - -#define jtable(wl, awl, spesp, axeax, name ) \ - .align 5; \ -jtab_##name: gen6(add, wl, axeax); \ - OP(push##wl##_##spesp##_sr); \ - OP(pop##wl##_##spesp##_sr); \ - gen6(or, wl, axeax); \ - OP(push##wl##_##spesp##_sr); \ - OP(_twobytes); \ - gen6(adc, wl, axeax); \ - OP(push##wl##_##spesp##_sr); \ - OP(pop##wl##_##spesp##_sr); \ - gen6(sbb, wl, axeax); \ - OP(push##wl##_##spesp##_sr); \ - OP(pop##wl##_##spesp##_sr); \ - gen6(and, wl, axeax); OP(_es); OP(daa); \ - gen6(sub, wl, axeax); OP(_cs); OP(das); \ - gen6(xor, wl, axeax); OP(_ss); OP(aaa); \ - gen6(cmp, wl, axeax); OP(_ds); OP(aas); \ - rep8(OP(inc##wl##_reg)); \ - rep8(OP(dec##wl##_reg)); \ - rep8(OP(push##wl##_##spesp##_reg)); \ - rep8(OP(pop##wl##_##spesp##_reg)); \ - OP(pusha##wl##_##spesp); OP(popa##wl##_##spesp); \ - WLOP(bound##wl); WLOP(arpl); \ - OP(_fs); OP(_gs); OP(_opsize); OP(_adsize); \ - OP(push##wl##_##spesp##_imm); WLOP(imul##wl##_imm); \ - OP(push##wl##_##spesp##_imm8); WLOP(imul##wl##_imm8); \ - OP(insb_##awl); OP(ins##wl##_##awl); \ - OP(outsb_##awl); OP(outs##wl##_##awl); \ - allcond(sj,_##wl,1); \ - EXTOP(grp1b_imm); EXTOP(grp1##wl##_imm); \ - EXTOP(grp1b_imm); EXTOP(grp1##wl##_imm8); \ - BOP(testb_reg_mem); WLOP(test##wl##_reg_mem); \ - BOP(xchgb_reg_mem); WLOP(xchg##wl##_reg_mem); \ - BOP(movb_reg_mem); WLOP(mov##wl##_reg_mem); \ - BOP(movb_mem_reg); WLOP(mov##wl##_mem_reg); \ - WLOP(mov##wl##_sr_mem); WLOP(lea##wl); \ - WLOP(mov##wl##_mem_sr); WLOP(pop##wl##_##spesp##_##awl); \ - OP(nop); rep7(xchg##wl##_##axeax##_reg,1); \ - OP(h2sext##wl); OP(s2dext##wl); \ - OP(lcall_##wl); OP(wait); \ - OP(pushf##wl##_##spesp); OP(popf##wl##_##spesp); \ - OP(sahf); OP(lahf); \ - OP(movb_##awl##_al); OP(mov##wl##_##awl##_##axeax); \ - OP(movb_al_##awl); OP(mov##wl##_##axeax##_##awl); \ - OP(movsb_##awl); OP(movs##wl##_##awl); \ - OP(cmpsb_##awl); OP(cmps##wl##_##awl); \ - OP(testb_imm_al); OP(test##wl##_imm_##axeax); \ - OP(stosb_##awl); OP(stos##wl##_##awl); \ - OP(lodsb_##awl); OP(lods##wl##_##awl); \ - OP(scasb_##awl); OP(scas##wl##_##awl); \ - rep8(OP(movb_imm_reg)); \ - rep8(OP(mov##wl##_imm_reg)); \ - EXTOP(shiftb_imm); EXTOP(shift##wl##_imm); \ - OP(ret##wl##_##spesp##_imm); OP(ret##wl##_##spesp); \ - WLOP(ldlptr##wl); WLOP(ldlptr##wl); \ - BOP(movb_imm_mem); WLOP(mov##wl##_imm_mem); \ - OP(enter##wl##_##spesp); OP(leave##wl##_##spesp); \ - OP(lret##wl##_imm); OP(lret##wl); \ - OP(int3); OP(int); OP(into); OP(iret##wl); \ - EXTOP(shiftb_1); EXTOP(shift##wl##_1); \ - EXTOP(shiftb_cl); EXTOP(shift##wl##_cl); \ - OP(aam); OP(aad); OP(ud); OP(xlatb_##awl); \ - rep8(OP(esc)); \ - OP(loopnz##awl##_##wl); OP(loopz##awl##_##wl); \ - OP(loop##awl##_##wl); OP(j_##awl##_cxz_##wl); \ - OP(inb_port_al); OP(in##wl##_port_##axeax); \ - OP(outb_al_port); OP(out##wl##_##axeax##_port); \ - OP(call##wl##_##spesp); OP(jmp_##wl); \ - OP(ljmp_##wl); OP(sjmp_##wl); \ - OP(inb_dx_al); OP(in##wl##_dx_##axeax); \ - OP(outb_al_dx); OP(out##wl##_##axeax##_dx); \ - OP(_lock); OP(ud); OP(_repnz); OP(_repz); \ - OP(hlt); OP(cmc); \ - EXTOP(grp3b); EXTOP(grp3##wl); \ - OP(clc); OP(stc); OP(cli); OP(sti); \ - OP(cld); OP(std); \ - EXTOP(grp4b); EXTOP(grp5##wl##_##spesp); \ - /* Here we start the table for twobyte instructions */ \ - OP(ud); OP(ud); WLOP(lar); WLOP(lsl); \ - OP(ud); OP(ud); OP(clts); OP(ud); \ - OP(invd); OP(wbinvd); OP(ud); OP(ud); \ - OP(ud); OP(ud); OP(ud); OP(ud); \ - rep8(OP(ud)); \ - rep8(OP(ud)); \ - OP(movl_cr_reg); OP(movl_reg_cr); \ - OP(movl_dr_reg); OP(movl_reg_dr); \ - OP(ud); OP(ud); OP(ud); OP(ud); \ - rep8(OP(ud)); \ - /* .long wrmsr, rdtsc, rdmsr, rdpmc; */\ - rep8(OP(ud)); \ - rep8(OP(ud)); \ - /* allcond(cmov, wl); */ \ - rep8(OP(ud)); rep8(OP(ud)); \ - rep8(OP(ud)); rep8(OP(ud)); \ - /* MMX Start */ \ - rep8(OP(ud)); rep8(OP(ud)); \ - rep8(OP(ud)); rep8(OP(ud)); \ - /* MMX End */ \ - allcond(j,_##wl, 1); \ - allcond(set,,2); \ - OP(push##wl##_##spesp##_sr); OP(pop##wl##_##spesp##_sr); \ - OP(ud) /* cpuid */; WLOP(bt##wl##_reg_mem); \ - WLOP(shld##wl##_imm); WLOP(shld##wl##_cl); \ - OP(ud); OP(ud); \ - OP(push##wl##_##spesp##_sr); OP(pop##wl##_##spesp##_sr); \ - OP(ud) /* rsm */; WLOP(bts##wl##_reg_mem); \ - WLOP(shrd##wl##_imm); WLOP(shrd##wl##_cl); \ - OP(ud); WLOP(imul##wl##_mem_reg); \ - BOP(cmpxchgb); WLOP(cmpxchg##wl); \ - WLOP(ldlptr##wl); WLOP(btr##wl##_reg_mem); \ - WLOP(ldlptr##wl); WLOP(ldlptr##wl); \ - WLOP(movzb##wl); WLOP(movzw##wl); \ - OP(ud); OP(ud); \ - EXTOP(grp8##wl); WLOP(btc##wl##_reg_mem); \ - WLOP(bsf##wl); WLOP(bsr##wl); \ - WLOP(movsb##wl); WLOP(movsw##wl); \ - BOP(xaddb); WLOP(xadd##wl); \ - OP(ud); OP(ud); \ - OP(ud); OP(ud); OP(ud); OP(ud); \ - rep8(OP(bswap)); \ - /* MMX Start */ \ - rep8(OP(ud)); rep8(OP(ud)); \ - rep8(OP(ud)); rep8(OP(ud)); \ - rep8(OP(ud)); rep8(OP(ud)); \ - /* MMX End */ - .align 5 /* 8kb of tables, 32 byte aligned */ -_jtables: jtable(w, a16, sp, ax, www) /* data16, addr16 */ - jtable(l, a16, sp, eax, lww) /* data32, addr16 */ - jtable(w, a32, sp, ax, wlw) /* data16, addr32 */ - jtable(l, a32, sp, eax, llw) /* data32, addr32 */ -/* The other possible combinations are only required by protected mode -code using a big stack segment */ -/* Here are the auxiliary tables for opcode extensions, note that -all entries get 2 or 3 added. */ -#define grp1table(bwl,t,s8) \ -grp1##bwl##_imm##s8:; \ - ENTRY(add##bwl##_imm##s8,t); ENTRY(or##bwl##_imm##s8,t); \ - ENTRY(adc##bwl##_imm##s8,t); ENTRY(sbb##bwl##_imm##s8,t); \ - ENTRY(and##bwl##_imm##s8,t); ENTRY(sub##bwl##_imm##s8,t); \ - ENTRY(xor##bwl##_imm##s8,t); ENTRY(cmp##bwl##_imm##s8,t) - - grp1table(b,2,) - grp1table(w,3,) - grp1table(w,3,8) - grp1table(l,3,) - grp1table(l,3,8) - -#define shifttable(bwl,t,c) \ -shift##bwl##_##c:; \ - ENTRY(rol##bwl##_##c,t); ENTRY(ror##bwl##_##c,t); \ - ENTRY(rcl##bwl##_##c,t); ENTRY(rcr##bwl##_##c,t); \ - ENTRY(shl##bwl##_##c,t); ENTRY(shr##bwl##_##c,t); \ - OP(ud); ENTRY(sar##bwl##_##c,t) - - shifttable(b,2,1) - shifttable(w,3,1) - shifttable(l,3,1) - - shifttable(b,2,cl) - shifttable(w,3,cl) - shifttable(l,3,cl) - - shifttable(b,2,imm) - shifttable(w,3,imm) - shifttable(l,3,imm) - -#define grp3table(bwl,t) \ -grp3##bwl: ENTRY(test##bwl##_imm,t); OP(ud); \ - ENTRY(not##bwl,t); ENTRY(neg##bwl,t); \ - ENTRY(mul##bwl,t); ENTRY(imul##bwl,t); \ - ENTRY(div##bwl,t); ENTRY(idiv##bwl,t) - - grp3table(b,2) - grp3table(w,3) - grp3table(l,3) - -grp4b: BOP(incb); BOP(decb); \ - OP(ud); OP(ud); \ - OP(ud); OP(ud); \ - OP(ud); OP(ud) - -#define grp5table(wl,spesp) \ -grp5##wl##_##spesp: \ - WLOP(inc##wl); WLOP(dec##wl); \ - WLOP(call##wl##_##spesp##_mem); WLOP(lcall##wl##); \ - WLOP(jmp##wl); WLOP(ljmp##wl); \ - WLOP(push##wl##_##spesp); OP(ud) - - grp5table(w,sp) - grp5table(l,sp) - -#define grp8table(wl) \ -grp8##wl: OP(ud); OP(ud); OP(ud); OP(ud); \ - WLOP(bt##wl##_imm); WLOP(bts##wl##_imm); \ - WLOP(btr##wl##_imm); WLOP(btc##wl##_imm) - - grp8table(w) - grp8table(l) -#ifdef __BOOT__ -_endjtables: .long 0 /* Points to _jtables after relocation */ -#endif |