// Copyright (C)  2000 Intel Corporation.  All rights reserved.
//
// $Header: /usr/development/orp/orp/arch/ia32/x86/x86.cpp,v 1.5 2002/01/04 08:37:55 xhshi Exp $
//
// Todo:
//			(2) fcom instruction
//

#include "platform.h"
#include <iostream.h>
#include "x86.h"

//
// operand structures for x86 registers
//
R_Opnd eax_opnd(eax_reg);
R_Opnd ax_opnd(eax_reg);
R_Opnd al_opnd(eax_reg);
R_Opnd ecx_opnd(ecx_reg);
R_Opnd edx_opnd(edx_reg);
R_Opnd ebx_opnd(ebx_reg);
R_Opnd esp_opnd(esp_reg);
R_Opnd ebp_opnd(ebp_reg);
R_Opnd esi_opnd(esi_reg);
R_Opnd edi_opnd(edi_reg);

R_Opnd reg_operand_array[] =
    { eax_opnd, ecx_opnd, edx_opnd, ebx_opnd, esp_opnd, ebp_opnd, esi_opnd, edi_opnd };

//
// stack top operand
//
M_Base_Opnd stack_top_opnd(esp_reg,0);
M_Base_Opnd stack_top_opnd_1(esp_reg,4);
M_Base_Opnd stack_top_opnd_2(esp_reg,8);
M_Base_Opnd stack_top_opnd_3(esp_reg,12);
M_Base_Opnd stack_top_opnd_4(esp_reg,16);

//
// X86 reg string
//
char* X86_Reg_Str[n_reg+1] = {"eax","ecx","edx","ebx","esp","ebp","esi","edi","xxx"};

int is_scratch_x86reg(X86_Reg_No r) {
	return ((1<<r) & ALL_X86_CALLER_REGS)? 1 : 0;
}

int is_callee_x86reg(X86_Reg_No r) {
	return ((1<<r) & ALL_X86_CALLEE_REGS)? 1: 0;
}

////////////////////////////////////////////////////////////////////////////////
//
// prefix
//
////////////////////////////////////////////////////////////////////////////////
char *prefix(char *inst,const X86_Prefix p) {
    *inst = (unsigned char) p;
     return inst+1;
}
////////////////////////////////////////////////////////////////////////////////
//
// cmpxchg or xchg
//
////////////////////////////////////////////////////////////////////////////////
char *cmpxchg(char *inst,const RM_Opnd *rm,const R_Opnd *r) {
	inst[0] = (unsigned char)0x0f;
	inst[1] = (unsigned char)0xb1;
	return rm->emit(inst+2,r->reg_no());
}
//zying1
char *cmpxchg(char *inst,const RM_Opnd *rm,const R_Opnd *r,X86_Opnd_Size sz) {
	switch (sz) {
		case opnd_8:	
			*inst++ = (unsigned char)0x0f;
			*inst++ = (unsigned char)0xb0;
			break;
		case opnd_16: 
			*inst++ = (unsigned char)0x66;
			// fall-thru
		case opnd_32:
			*inst++ = (unsigned char)0x0f;
			*inst++ = (unsigned char)0xb1;
			break;
	}
	return rm->emit(inst,r->reg_no());
}
char *xchg(char *inst,const RM_Opnd *rm,const R_Opnd *r,X86_Opnd_Size sz) {
	if (sz == opnd_8) // 8-bit store to memory
		*inst = (unsigned char)0x86;
    else // 16 or 32-bit
        *inst = (unsigned char)0x87;
	return rm->emit(inst+1,r->reg_no());
}

////////////////////////////////////////////////////////////////////////////////
//
// inc(rement), dec(rement), not, neg(ate) instructions
//
////////////////////////////////////////////////////////////////////////////////
char *inc(char *inst,const M_Opnd * m) {
	*inst = (unsigned char)0xff;
	return m->emit(inst+1,0);
}
char *inc(char *inst,const R_Opnd *r) {
	*inst = (unsigned char)0x40 + r->reg_no();
	return inst+1;
}
char *dec(char *inst,const M_Opnd *m) {
	*inst = (unsigned char)0xff;
	return m->emit(inst+1,1);
}
char *dec(char *inst,const R_Opnd *r) {
	*inst = (unsigned char)0x48 + r->reg_no();
	return inst+1;
}
char *_not(char *inst,const RM_Opnd *rm) {
	*inst = (unsigned char)0xf7;
	return rm->emit(inst+1,2);
}
char *neg(char *inst,const RM_Opnd *rm) {
	*inst = (unsigned char)0xf7;
	return rm->emit(inst+1,3);
}
char *nop(char *inst) {
	*inst = (unsigned char)0x90;
	return inst+1;
}
////////////////////////////////////////////////////////////////////////////////
//
// alu instructions: add, or, adc, sbb, and, sub, xor, cmp
//
////////////////////////////////////////////////////////////////////////////////
char *alu(char *inst,X86_ALU_Opcode opc,const RM_Opnd *rm,const Imm_Opnd *imm,bool is_lo_reg) {
//zying1
	if ( !is_lo_reg){
		// cmp ah, imm8
		if (rm->is_eax_reg() && imm->is_imm8() && opc == cmp_opc){
			*inst++ = (unsigned char)0x80;
			unsigned char reg = ((R_Opnd*)rm)->reg_no() + 4;
			*inst++ = (unsigned char)(0xf8|reg);
			return imm->emit8(inst);
		}

		// sub ah, imm8
		if (rm->is_eax_reg() && imm->is_imm8() && opc == sub_opc){
			*inst++ = (unsigned char)0x80;
			unsigned char reg = ((R_Opnd*)rm)->reg_no() + 4;
			*inst++ = (unsigned char)(0xe8|reg);
			return imm->emit8(inst);
		}
	}
//zying1	
	if (rm->is_eax_reg()) {
		//
		// eax destination form
		//
        unsigned opcode = (((unsigned char)opc) << 3) + 5;
        if (((R_Opnd*)rm) == &al_opnd) {
            assert(imm->is_imm8());
            *inst = opcode - 1;
            return imm->emit8(inst+1);
        } 
        if (((R_Opnd*)rm) == &ax_opnd) {
            assert(imm->is_imm16());
	        // 16-bit store to memory are implemented
            // by adding an operand size override
            // prefix to the 32-bit mov instruction
		    *inst++ = (unsigned char)0x66;
		    *inst = opcode;
            return imm->emit16(inst+1);
        }
        *inst = opcode;
        return imm->emit32(inst+1);
	}
	if (imm->is_imm8() && opc!=or_opc && opc!=xor_opc && opc!=and_opc) {
		//
		// 8-bit immediate form
		//
		*inst = (unsigned char)0x83;
		return imm->emit8(rm->emit(inst+1,opc));
	}
	//
	// 32-bit immediate form
	//
	*inst = (unsigned char)0x81;
	return imm->emit32(rm->emit(inst+1,opc));
}
char *alu(char *inst,X86_ALU_Opcode opc,const M_Opnd *m,const R_Opnd *r) {
	*inst = (((unsigned char)opc) << 3) + 1;
	return m->emit(inst+1,r->reg_no());
}
char *alu(char *inst,X86_ALU_Opcode opc,const R_Opnd *r,const RM_Opnd *rm) {
	*inst = (((unsigned char)opc) << 3) + 3;
	return rm->emit(inst+1,r->reg_no());
}
//zying1
char *alu(char *inst,X86_ALU_Opcode opc,const R_Opnd *r,const RM_Opnd *rm,X86_Opnd_Size sz) {

	if ( opc == cmp_opc) {// for cmp only
		switch (sz) {
		case opnd_8:	// 8-bit store to memory
			*inst = (((unsigned char)opc) << 3) + 3 - 1;
			break;
		case opnd_16: // 16-bit store to memory are implemented
						// by adding an operand size override
						// prefix to the 32-bit mov instruction
			*inst++ = (unsigned char)0x66;
			// fall-thru
		case opnd_32:
			*inst = (((unsigned char)opc) << 3) + 3;
			break;
		}
	}
	else
		*inst = (((unsigned char)opc) << 3) + 3;

	return rm->emit(inst+1,r->reg_no());
}
//zying1
////////////////////////////////////////////////////////////////////////////////
//
// test instruction
//
////////////////////////////////////////////////////////////////////////////////
char *test(char *inst,const RM_Opnd *rm,const Imm_Opnd *imm) {
	if (rm->is_eax_reg()) {
		//
		// eax destination form
		//
		*inst = (unsigned char)0xa9;
		return imm->emit32(inst+1);
	}
	//
	// 32-bit immediate form
	//
	*inst = (unsigned char)0xf7;
	return imm->emit32(rm->emit(inst+1,0));
}
char *test(char *inst,const RM_Opnd *rm,const R_Opnd *r) {
	*inst = (unsigned char)0x85;
	return rm->emit(inst+1,r->reg_no());
}
////////////////////////////////////////////////////////////////////////////////
//
// shift instructions: shl, shr, sar, shld, shrd
//
////////////////////////////////////////////////////////////////////////////////
char *shift(char *inst,X86_Shift_Opcode opc,
			const RM_Opnd *rm,const Imm_Opnd *imm) {
	if (imm->value == 1) {
		//
		// use the shift by 1 form
		//
		*inst = (unsigned char)0xd1;
		return rm->emit(inst+1,opc);
	}
	*inst = (unsigned char)0xc1;
	return imm->emit8(rm->emit(inst+1,opc));
}
char *shift(char *inst,X86_Shift_Opcode opc,const RM_Opnd *rm) {
	//
	// shift by cl
	//
	*inst = (unsigned char)0xd3;
	return rm->emit(inst+1,opc);
}
char *shift(char *inst,X86_Shift_Opcode opc,const RM_Opnd *rm,
                        const R_Opnd *r,const Imm_Opnd *imm) {
	if(opc==shrd_opc){
		inst[0] = (unsigned char)0x0f;
		inst[1] = (unsigned char)0xac;
	}else{
		assert(opc==shld_opc) ;
		inst[0] = (unsigned char)0x0f;
		inst[1] = (unsigned char)0xa4;
	}
	return imm->emit8(rm->emit(inst+2,r->reg_no()));
}
char *shift(char *inst,X86_Shift_Opcode opc,const RM_Opnd *rm,
                        const R_Opnd *r) {
	//
	// shift by cl
	//
	if(opc==shrd_opc){
		inst[0] = (unsigned char)0x0f;
		inst[1] = (unsigned char)0xad;
	}else{
		assert(opc==shld_opc) ;
		inst[0] = (unsigned char)0x0f;
		inst[1] = (unsigned char)0xa5;
	}
	return rm->emit(inst+2,r->reg_no());
}
////////////////////////////////////////////////////////////////////////////////
//
// multiply instructions: mul, imul
//
////////////////////////////////////////////////////////////////////////////////
char *mul(char *inst,const RM_Opnd *rm,int is_signed) {
	//
	// EDX:EAX = EAX * rm
	//
	*inst = (unsigned char)0xf7;
	return rm->emit(inst+1,4+(is_signed?1:0));
}
char *imul(char *inst,const R_Opnd *r,const RM_Opnd *rm) {
	//
	// r *= rm
	//
	inst[0] = (unsigned char)0x0f;
	inst[1] = (unsigned char)0xaf;
	return rm->emit(inst+2,r->reg_no());
}
char *imul(char *inst,const R_Opnd *r,const Imm_Opnd *imm) {
	return imul(inst,r,r,imm);
}
char *imul(char *inst,const R_Opnd *r,const RM_Opnd *rm,const Imm_Opnd *imm) {
	//
	// r = rm * imm
	//
	if (imm->is_imm8()) {
		//
		// use the 8-bit form
		//
		*inst = (unsigned char)0x6b;
		return imm->emit8(rm->emit(inst+1,r->reg_no()));
	}
	//
	// use the 32-bit form
	//
	*inst = (unsigned char)0x69;
	return imm->emit32(rm->emit(inst+1,r->reg_no()));
}
////////////////////////////////////////////////////////////////////////////////
//
// divide instructions: div, idiv
//
////////////////////////////////////////////////////////////////////////////////
char *div(char *inst,const RM_Opnd *rm,int is_signed) {
	//
	// divide EDX:EAX by rm;
	//	eax = quotient, edx = remainder
	//
	*inst = (unsigned char)0xf7;
	return rm->emit(inst+1,6+(is_signed?1:0));
}
////////////////////////////////////////////////////////////////////////////////
//
// data movement: mov
//
////////////////////////////////////////////////////////////////////////////////
char *mov(char *inst,const M_Opnd *m,const R_Opnd *r,X86_Opnd_Size sz,bool is_lo_reg) {
	//
	// m = r
	//
	switch (sz) {
	case opnd_8:	// 8-bit store to memory
		*inst = (unsigned char)0x88;
		break;
	case opnd_16: // 16-bit store to memory are implemented
					// by adding an operand size override
					// prefix to the 32-bit mov instruction
		*inst++ = (unsigned char)0x66;
		// fall-thru
	case opnd_32:
		*inst = (unsigned char)0x89;
		break;
	}
//zying1
	return m->emit(inst+1,is_lo_reg?r->reg_no():(r->reg_no()+4));
//	return m->emit(inst+1,r->reg_no());
}
char *mov(char *inst,const R_Opnd *r,const RM_Opnd *rm,X86_Opnd_Size sz) {
    //
    // r = rm
    //
    switch (sz) {
    case opnd_8:	// 8-bit store to memory
        *inst = (unsigned char)0x8a;
        break;
    case opnd_16: // 16-bit store to memory are implemented
        // by adding an operand size override
        // prefix to the 32-bit mov instruction
        *inst++ = (unsigned char)0x66;
        // fall-thru
    case opnd_32:
        *inst = (unsigned char)0x8b;
        break;
    }
    return rm->emit(inst+1,r->reg_no());
}
char *mov(char *inst,const R_Opnd *r,const Imm_Opnd *imm) {
	//
	// r = imm
	//
#if 0 // unsafe for high operand of a long add/sub
	//:: Turned off for long add
	if (imm->value == 0)
		return alu(inst,xor_opc,r,r);
#endif // 0
	*inst = (unsigned char)0xb8 + r->reg_no();
	return imm->emit32(inst+1);
}
char *mov_imm32(char *inst,const R_Opnd *r,const unsigned imm32) {
	*inst = (unsigned char)0xb8 + r->reg_no();
	Imm_Opnd imm(imm32);
	return imm.emit32(inst+1);
}
char *mov(char *inst,const M_Opnd *m,const Imm_Opnd *imm,X86_Opnd_Size sz) {
	//
	// m{8,16,32} = imm{8,16,32}
	//
	switch (sz) {
	case opnd_8:		// 8-bit store of imm8
		*inst = (unsigned char)0xc6;
		return imm->emit8(m->emit(inst+1,0));
	case opnd_16:	// 16-bit store of imm16; implemented with
					// an operand-size override prefix
		inst[0] = (unsigned char)0x66;
		inst[1] = (unsigned char)0xc7;
		return imm->emit16(m->emit(inst+2,0));
	}
	//
	// sz == word_opnd: 32-bit store of imm32
	//
	*inst = (unsigned char)0xc7;
	return imm->emit32(m->emit(inst+1,0));
}
////////////////////////////////////////////////////////////////////////////////
//
// load effective address: lea
//
////////////////////////////////////////////////////////////////////////////////
char *lea(char *inst,const R_Opnd *r,const M_Opnd *m) {
	*inst = (unsigned char)0x8d;
	return m->emit(inst+1,r->reg_no());
}
////////////////////////////////////////////////////////////////////////////////
//
// conversions, i.e., widening instructions
//
////////////////////////////////////////////////////////////////////////////////
char *widen(char *inst,const R_Opnd *r,const RM_Opnd *rm,
			unsigned is_signed,unsigned is_half) {
	inst[0] = 0x0f;
	unsigned char op = 0xb6;
	if (is_signed)
		op += 0x08;
	if (is_half)
		op += 0x01;
	inst[1] = op;
	return rm->emit(inst+2,r->reg_no());
}
char *cdq(char *inst) {
	*inst = (unsigned char)0x99;
	return inst+1;
}
char *wait(char *inst) {
	*inst = (unsigned char)0x9b;
	return inst+1;
}
////////////////////////////////////////////////////////////////////////////////
//
// floating-point instructions
//
////////////////////////////////////////////////////////////////////////////////
//
//		st(0) = st(0) fp_op m{32,64}real
//
char *fp_op_mem(char *inst,X86_FP_Opcode opc,const M_Opnd *mem,int is_double) {
	if (is_double)
		*inst = (unsigned char)0xdc;
	else
		*inst = (unsigned char)0xd8;
	return mem->emit(inst+1,(unsigned)opc);
}
//
//		st(0) = st(0) fp_op st(i)
//
char *fp_op(char *inst,X86_FP_Opcode opc,unsigned i) {
	inst[0] = (unsigned char)0xd8;
	inst[1] = (unsigned char)0xc0+(opc<<3)+(i&0x07);
	return inst+2;
}
//
//		st(i) = st(i) fp_op st(0)	; optionally pop stack
//
char *fp_op(char *inst,X86_FP_Opcode opc,unsigned i,unsigned pop_stk) {
    // inst[1] swaps fsub/fsubr and fdiv/fdivr.  Create a map array
    // to fix it up.
    static unsigned char map[] = { 0, 1, 2, 3, 5, 4, 7, 6, 8};
	if (pop_stk) 
		inst[0] = (unsigned char)0xde;
	else 
		inst[0] = (unsigned char)0xdc;
	inst[1] = (unsigned char)0xc0+(map[opc]<<3)+(i&0x07);
	return inst+2;
}

char *fstp(char *inst,unsigned i) {
	inst[0] = (unsigned char)0xdd;
	inst[1] = (unsigned char)0xd8+i;
	return inst+2;
}

char *fcompp(char *inst) {
	inst[0] = (unsigned char)0xde;
	inst[1] = (unsigned char)0xd9;
	return inst+2;
}
char *fnstsw(char *inst){
	//
	// stores fpu status word into AX register
	//
	inst[0] = (unsigned char)0xdf;
	inst[1] = (unsigned char)0xe0;
	return inst+2;
}
char *fnstcw(char *inst,const M_Opnd *mem) {
	//
	// stores fpu control word into memory
	//
	inst[0] = (unsigned char)0xd9;
	return mem->emit(inst+1,7);
}
char *fldcw(char *inst,const M_Opnd *mem) {
	//
	// load fpu control word from memory
	//
	inst[0] = (unsigned char)0xd9;
	return mem->emit(inst+1,5);
}

char *fchs(char *inst) {
	inst[0] = (unsigned char)0xd9;
	inst[1] = (unsigned char)0xe0;
	return inst+2;
}
char *frem(char *inst) {
	inst[0] = (unsigned char)0xd9;
	inst[1] = (unsigned char)0xf8;
	return inst+2;
}
char *fxch(char *inst,unsigned i) {
	inst[0] = (unsigned char)0xd9;
	inst[1] = (unsigned char)0xc8+(i&0x07);
	return inst+2;
}
char *fcomip(char *inst, unsigned i) {
    inst[0] = (unsigned char)0xdf;
    inst[1] = (unsigned char)0xf0+(i&0x07);
	return inst+2;
}
//
// load from memory (as fp) into fp register stack
//
char *fld(char *inst,const M_Opnd *mem,int is_double) {
	if (is_double)
		*inst = (unsigned char)0xdd;
	else
		*inst = (unsigned char)0xd9;
	return mem->emit(inst+1,0);
}
//
// load m80real from memory
//
char *fld80(char *inst,const M_Opnd *mem) {
    *inst = (unsigned char)0xdb;
    return mem->emit(inst+1,5);
}

//
// load from memory (as int) into fp register stack
//
char *fild(char *inst,const M_Opnd *mem,int is_long) {
	if (is_long) {
		*inst = (unsigned char)0xdf;
        	return mem->emit(inst+1,5);
	} else {
		*inst = (unsigned char)0xdb;
        	return mem->emit(inst+1,0);
        }
}
//
// push st(i) onto fp register stack
//
char *fld(char *inst,unsigned i) {
	inst[0] = (unsigned char)0xd9;
	inst[1] = (unsigned char)0xc0+(i&0x07);
	return inst+2;
}
//
// push the constants 0.0 and 1.0 onto the fp register stack
//
char *fldz(char *inst) {
	inst[0] = (unsigned char)0xd9;
	inst[1] = (unsigned char)0xee;
	return inst+2;
}
char *fld1(char *inst) {
	inst[0] = (unsigned char)0xd9;
	inst[1] = (unsigned char)0xe8;
	return inst+2;
}

//
// store stack to memory (as fp), optionally popping the stack
//
char *fst(char *inst,const M_Opnd *mem,int is_double,unsigned pop_stk) {
	pop_stk = (pop_stk?1:0);
	if (is_double)
		*inst = (unsigned char)0xdd;
	else
		*inst = (unsigned char)0xd9;
	return mem->emit(inst+1,2+pop_stk);
}
//
// store stack to memory (as int), always popping the stack
//
char *fist_pop(char *inst,const M_Opnd *mem,int is_long) {
	if (is_long) {
		*inst = (unsigned char)0xdf;
        	return mem->emit(inst+1,7);
	} else {
		*inst = (unsigned char)0xdb;
        	return mem->emit(inst+1,3);
        }
}
////////////////////////////////////////////////////////////////////////////////
//
// stack push and pop instructions
//
////////////////////////////////////////////////////////////////////////////////
char *push(char *inst,const M_Opnd *rm) {
	*inst = (unsigned char)0xff;
	return rm->emit(inst+1,6);
}
char *push(char *inst,const Imm_Opnd *imm) {
	*inst = (unsigned char)0x68;
	return imm->emit32(inst+1);
}
char *push(char *inst,const R_Opnd *r) {
	*inst = (unsigned char)0x50 + r->reg_no();
	return inst+1;
}
char *pop(char *inst,const R_Opnd *r) {
	*inst = (unsigned char)0x58 + r->reg_no();
	return inst+1;
}
char *pop(char *inst,const M_Opnd *mem) {
	*inst = (unsigned char)0x8f;
	return mem->emit(inst+1,0);
}
char *pushad(char *inst) {
	*inst = (unsigned char)0x60;
    return inst+1;
}
char *pushfd(char *inst) {
	*inst = (unsigned char)0x9c;
    return inst+1;
}
char *popad(char *inst) {
	*inst = (unsigned char)0x61;
    return inst+1;
}
char *popfd(char *inst) {
	*inst = (unsigned char)0x9d;
    return inst+1;
}

////////////////////////////////////////////////////////////////////////////////
//
// control-flow instructions
//
////////////////////////////////////////////////////////////////////////////////
//
// jump with 32-bit relative
//
char *jump32(char *inst,const Imm_Opnd *imm) {
	*inst = (unsigned char)0xe9;
	return imm->emit32(inst+1);
}
//
// jump with 8-bit relative
//
char *jump8(char *inst,const Imm_Opnd *imm) {
	*inst = (unsigned char)0xeb;
	return imm->emit8(inst+1);
}
//
// indirect jump
//
char *jump(char *inst,const RM_Opnd *rm) {
	*inst = (unsigned char)0xff;
	return rm->emit(inst+1,4);
}
//
// jump to target address
//
char *jump(char *inst,char *target) {
	// sub 2 bytes for the short version
	Imm_Opnd imm(target - inst - 2);
	if (imm.is_imm8()) {
		//
		// use 8-bit signed relative form
		//
		return jump8(inst,&imm);
	} else {
		//
		// use 32-bit signed relative form
		//
		imm.value -= 3; // 3 more bytes for the long version
		return jump32(inst,&imm);
	}
}

//
// jump with displacement
//
char *jump(char *inst,int disp) {
	// sub 2 bytes for the short version
	Imm_Opnd imm(disp - 2);
	if (imm.is_imm8()) {
		//
		// use 8-bit signed relative form
		//
		return jump8(inst,&imm);
	} else {
		//
		// use 32-bit signed relative form
		//
		imm.value -= 3; // 3 more bytes for the long version
		return jump32(inst,&imm);
	}
}

static unsigned char cc_unsigned_map[n_cc] = {
	0x74, // eq
	0x75, // ne
	0x72, // lt
	0x76, // le
	0x77, // gt
	0x73, // ge
	0x78, // lz
	0x79, // gez
    0x7a, // p
    0x7b, // np
};

static unsigned char cc_signed_map[n_cc] = {
	0x74, // eq
	0x75, // ne
	0x7c, // lt
	0x7e, // le
	0x7f, // gt
	0x7d, // ge
	0x78, // lz
	0x79, // gez
    0x7a, // p
    0x7b, // np
};

X86_CC cc_commute_map[n_cc] = {
	cc_eq,	// cc_eq
	cc_ne,	// cc_ne
	cc_gt,	// cc_lt
	cc_ge,	// cc_le
	cc_lt,	// cc_gt
	cc_le,	// cc_ge
};

//
// conditional branch with 8-bit branch offset
//
char *branch8(char *inst,X86_CC cc,const Imm_Opnd *imm,unsigned is_signed) {
	if (is_signed)
		*inst = cc_signed_map[cc];
	else
		*inst = cc_unsigned_map[cc];
	return imm->emit8(inst+1);
}
//
// conditional branch with 32-bit branch offset
//
char *branch32(char *inst,X86_CC cc,const Imm_Opnd *imm,unsigned is_signed) {
	inst[0] = (unsigned char)0x0f;
	if (is_signed)
		inst[1] = cc_signed_map[cc]+0x10;
	else
		inst[1] = cc_unsigned_map[cc]+0x10;
	return imm->emit32(inst+2);
}
//
// conditional branch
//
char *branch(char *inst,X86_CC cc,char *target,unsigned is_signed) {
	// subtract 2 bytes for the short version
	Imm_Opnd imm(target - inst - 2);
	if (imm.is_imm8()) {
		//
		// use 8-bit signed relative form
		//
		return branch8(inst,cc,&imm,is_signed);
	} else {
		//
		// use 32-bit signed relative form
		//
		imm.value -= 4; // 4 more bytes for the long version
		return branch32(inst,cc,&imm,is_signed);
	}
}
//
// conditional branch with displacement immediate
//
char *branch(char *inst,X86_CC cc,int disp,unsigned is_signed) {
	// subtract 2 bytes for the short version
	Imm_Opnd imm(disp - 2);
	if (imm.is_imm8()) {
		//
		// use 8-bit signed relative form
		//
		return branch8(inst,cc,&imm,is_signed);
	} else {
		//
		// use 32-bit signed relative form
		//
		imm.value -= 4; // 4 more bytes for the long version
		return branch32(inst,cc,&imm,is_signed);
	}
}
//
// call with displacement
//
char *call(char *inst,const Imm_Opnd *imm) {
	*inst = (unsigned char)0xe8;
	return imm->emit32(inst+1);
}
//
// indirect call through register or memory location
//
char *call(char *inst,const RM_Opnd *rm) {
	*inst = (unsigned char)0xff;
	return rm->emit(inst+1,2);
}
//
// call target address
//
char *call(char *inst,char *target) {
	int offset = target - inst;
	offset -= 5; // sub 5 bytes for this instruction
	return call(inst,&Imm_Opnd(offset));
}
//
// return instruction
//
char *ret(char *inst) {
	*inst = (unsigned char)0xc3;
	return inst+1;
}

char *ret(char *inst,Imm_Opnd *imm) {
	if (imm->value == 0)
		return ret(inst);
	*inst = (unsigned char)0xc2;
	return imm->emit16(inst+1);
}

//////////////////////////////
// conditional move for PPro
//////////////////////////////
char *cmov(char *inst, X86_CC cc, unsigned is_signed, const R_Opnd *r,const RM_Opnd *rm)
{
    inst[0] = (unsigned char) 0x0f;
    if (is_signed)
		inst[1] = cc_signed_map[cc]-0x30;
	else
		inst[1] = cc_unsigned_map[cc]-0x30;
	return rm->emit(inst+2,r->reg_no());
}


////////////////////////////////////////////////////////////////////////////////
//
// stack frame allocation instructions: enter & leave
//
////////////////////////////////////////////////////////////////////////////////
//
//	enter frame_size 
//
//	is equivalent to:
//
//	push	ebp
//	mov		ebp,esp
//	sub		esp,frame_size
//
char *enter(char *inst,const Imm_Opnd *imm) {
	// imm better be a 16-bit immediate!
	*inst = (unsigned char)0xc8;
	inst = imm->emit16(inst+1);
	*inst = 0;
	return inst+1;
}
//
// leave
//
// is equivalent to:
//
// mov		esp,ebp
// pop		ebp
//
char *leave(char *inst) {
	*inst = (unsigned char)0xc9;
	return inst+1;
}
//
// sahf
//
char *sahf(char *inst) {
	*inst = (unsigned char)0x9e;
	return inst+1;
}

char *math_fsin(char *inst) {
    inst[0] = (unsigned char)0xd9;
    inst[1] = (unsigned char)0xfe;
    return inst+2;
}

char *math_fcos(char *inst) {
    inst[0] = (unsigned char)0xd9;
    inst[1] = (unsigned char)0xff;
    return inst+2;
}

char *math_fabs(char *inst) {
    inst[0] = (unsigned char)0xd9;
    inst[1] = (unsigned char)0xe1;
    return inst+2;
}

char *math_fpatan(char *inst) {
    inst[0] = (unsigned char)0xd9;
    inst[1] = (unsigned char)0xf3;
    return inst+2;
}

char *math_fprem(char *inst) {
    inst[0] = (unsigned char)0xd9;
    inst[1] = (unsigned char)0xf8;
    return inst+2;
}

char *math_fprem1(char *inst) {
    inst[0] = (unsigned char)0xd9;
    inst[1] = (unsigned char)0xf5;
    return inst+2;
}

char *math_frndint(char *inst) {
    inst[0] = (unsigned char)0xd9;
    inst[1] = (unsigned char)0xfc;
    return inst+2;
}

char *math_fsqrt(char *inst) {
    inst[0] = (unsigned char)0xd9;
    inst[1] = (unsigned char)0xfa;
    return inst+2;
}

char *math_fptan(char *inst) {
    inst[0] = (unsigned char)0xd9;
    inst[1] = (unsigned char)0xf2;
    return inst+2;
}

char *padding(char *inst, unsigned num)
{
    switch (num)
    {
    case 1: // nop
        return nop(inst);
        break;
    case 2: // mov eax, eax
        inst[0] = (char)0x8b;
        inst[1] = (char)0xc0;
        return inst+2;
        break;
    case 3: // lea ebp, [ebp]
        inst[0] = (char)0x8d;
        inst[1] = (char)0x6d;
        inst[2] = (char)0x00;
        return inst+3;
        break;
    case 4: // lea esp, [esp]
        inst[0] = (char)0x8d;
        inst[1] = (char)0x64;
        inst[2] = (char)0x24;
        inst[3] = (char)0x00;
        return inst+4;
        break;
    case 5: // [4+1]
        inst[0] = (char)0x8d;
        inst[1] = (char)0x64;
        inst[2] = (char)0x24;
        inst[3] = (char)0x00;
        return nop(inst+4);
        break;
    case 6: // lea ebp, [ebp]
        inst[0] = (char)0x8d;
        inst[1] = (char)0xad;
        inst[2] = (char)0x00;
        inst[3] = (char)0x00;
        inst[4] = (char)0x00;
        inst[5] = (char)0x00;
        return inst+6;
        break;
    case 7: // lea esp, [esp]
        inst[0] = (char)0x8d;
        inst[1] = (char)0xa4;
        inst[2] = (char)0x24;
        inst[3] = (char)0x00;
        inst[4] = (char)0x00;
        inst[5] = (char)0x00;
        inst[6] = (char)0x00;
        return inst+7;
        break;
    }
    return inst;
}
////////////////////////////////////////////////////////////////////////////////
//
// prolog and epilog code generation
//
////////////////////////////////////////////////////////////////////////////////
char *prolog(char *inst,unsigned frame_size,unsigned reg_mask) {
	//
	// enter	frame_size
	//
	inst = enter(inst,&Imm_Opnd(frame_size));
	//
	// push callee-save registers
	//
	unsigned m = 1;
	for (unsigned i = 0; i < n_reg; i++, m=m<<1) {
		if (reg_mask & m) 
			inst = push(inst,&R_Opnd((X86_Reg_No)i));
	}
	return inst;
}
char *epilog(char *inst,unsigned reg_mask) {	
	//
	// pop saved callee-saved registers
	//
	unsigned m = 1 << edi_reg;
	for (unsigned i = edi_reg; m != 0; i--, m=m>>1) {
		if (reg_mask & m) 
			inst = pop(inst,&R_Opnd((X86_Reg_No)i));
	}
	//
	// leave
	//
	inst = leave(inst);
	//
	// emit the return instruction
	//
	return ret(inst);
}
////////////////////////////////////////////////////////////////////////////////
//
// Instruction operands: R_Opnd, Abs_Opnd, M_Opnd, M_SIB_Opnd
//
////////////////////////////////////////////////////////////////////////////////

char *half_reg_strings[n_reg] = {
	" ax", " cx", " dx", " bx",
	" sp", " bp", " si", " di"
};

char *byte_reg_strings[n_reg] = {
	" al", " cl", " dl", " bl",
	" ah", " ch", " dh", " bh"
};

char *reg_strings[n_reg] = {
	"eax", "ecx", "edx", "ebx", 
	"esp", "ebp", "esi", "edi"
};

char **reg_size_strings[n_opnd_size] = {
	byte_reg_strings,half_reg_strings,reg_strings,NULL
};

char *mem_opnd_size_strings[n_opnd_size] = {
	"byte ptr", "word ptr", "dword ptr", "qword ptr"
};

//
// emit an rm mod or sib address specifier byte
//
char *address_byte(char *inst,unsigned m,unsigned o,unsigned r) {
	*inst = (((m&0x03)<<6)|((o&0x07)<<3)|((r&0x07)));
	return inst+1;
}
char *R_Opnd::emit(char *inst,unsigned r) const {
	return address_byte(inst,3,r,_reg_no);
}
void R_Opnd::print(ostream& cout,X86_Opnd_Size sz) const {
	cout << reg_size_strings[sz][_reg_no];
}
char *M_Opnd::emit(char *inst,unsigned r) const {
	inst = address_byte(inst,0,r,5);
	return disp.emit32(inst);
}
void M_Opnd::print(ostream& cout,X86_Opnd_Size sz) const {
	cout << mem_opnd_size_strings[sz];
	cout << " [" << (void*)disp.value << "]";
}
char *M_Base_Opnd::emit(char *inst,unsigned r) const {
	if (base_reg == esp_reg) {
		//
		// to use ESP as a base register, we have to use the
		// SIB byte form
		//
		if (disp.value == 0) {
			inst = address_byte(inst,0,r,esp_reg);
			return address_byte(inst,0,esp_reg,esp_reg);
		} else if (disp.is_imm8()) {
			inst = address_byte(inst,1,r,esp_reg);
			inst = address_byte(inst,0,esp_reg,esp_reg);
			return disp.emit8(inst);
		} else {
			inst = address_byte(inst,2,r,esp_reg);
			inst = address_byte(inst,0,esp_reg,esp_reg);
			return disp.emit32(inst);
		}
	}
	if (disp.value == 0 && base_reg != ebp_reg) {
		// 
		// use the no displacement form
		//
		return address_byte(inst,0,r,base_reg);
	}
	if (disp.is_imm8()) {
		//
		// use the sign-extended 8-bit displacement form
		//
		inst = address_byte(inst,1,r,base_reg);
		return disp.emit8(inst);
	}
	//
	// use the 32-bit displacement form
	//
	inst = address_byte(inst,2,r,base_reg);
	return disp.emit32(inst);
}
void M_Base_Opnd::print(ostream& cout,X86_Opnd_Size sz) const {
	cout << mem_opnd_size_strings[sz];
	cout << " [" << reg_strings[base_reg];
	if (disp.value != 0 || base_reg == ebp_reg) {
		// 
		// print displacement
		//
		int val = disp.value;
		cout << (val<0?"":"+") << val;
	} 
	cout << "]";
}
char *M_Index_Opnd::emit(char *inst,unsigned r) const {
	if (disp.value == 0 && base_reg != ebp_reg) {
		//
		// use the no displacement form
		//
		inst = address_byte(inst,0,r,4);
		return address_byte(inst,shift_amount,index_reg,base_reg);
	}
	if (disp.is_imm8()) {
		//
		// use the sign-extended 8-bit displacement form
		//
		inst = address_byte(inst,1,r,4);
		inst = address_byte(inst,shift_amount,index_reg,base_reg);
		return disp.emit8(inst);
	}
	//
	// use the 32-bit displacement form
	//
	inst = address_byte(inst,2,r,4);
	inst = address_byte(inst,shift_amount,index_reg,base_reg);
	return disp.emit32(inst);
}
void M_Index_Opnd::print(ostream& cout,X86_Opnd_Size sz) const {
	cout << mem_opnd_size_strings[sz];
	cout << " [" << reg_strings[base_reg];
	if (index_reg != esp_reg) {
		 cout << "+" << reg_strings[index_reg];
		if (shift_amount) {
			unsigned val = 1<<shift_amount;
			cout << "*" << val;
		}
	}
	int val = disp.value;
	if (val)
		cout << (val<0?"":"+") << val;
	cout << "]";
}
