// Copyright (C)  2000 Intel Corporation.  All rights reserved.
//
// $Header: /usr/development/orp/orp/arch/ia32/ia32_o1_jit/cg_mul_div_mod.cpp,v 1.3 2001/10/24 07:48:09 zying1 Exp $
//

#include "defines.h"
#include "jit_intf.h"
#include "jit_runtime_support.h"
#include <iostream.h>
#include "code_emitter.h"
#include "stack.h"
#include "operand.h"
#include "profiling.h"
#include "lazy_code_selector.h"
#include "cg_mul_div_mod.h"
#include "fp_compatibility.h"

extern void make_dst_killable(Mem_Manager& mem_manager,
							  Code_Emitter& emitter, Stack& stack, 
							  Operand*& dst, Operand*& src,int is_commutable);


static int is_power2(int v) {
	for (int i = 0; i < 32; i++) {
		if (v & 1){
			if ((v >> 1) == 0)
				return i;
			else 
				return -1;
		}
		v = v >> 1;
	}
	return -1;
}
void gen_imul(Mem_Manager& mem_manager,Code_Emitter& emitter,Stack& stack) {
	Operand *src = stack.pop();
	Operand *dst = stack.pop();
	//
	// destination is definitely a scratch register
	//
	if (src->kind == Operand::Imm || dst->kind == Operand::Imm) {
		if (dst->kind == Operand::Imm) { // swap dst and src
			Operand *t = src; src = dst; dst = t;
		}
		int val = ((Imm_Operand*)src)->imm_opnd.value;
		int n_shift;
		//
		// generate more effecient code if the divisor is power of 2
		//
		if (val > 0 && (n_shift = is_power2(val)) != -1) {
			make_dst_killable(mem_manager,emitter,stack,dst,src,0); // commutable
			dst->emit_shift(emitter,shl_opc,&Imm_Operand(n_shift));
			stack.push(dst);
			return;
		}
	}
	make_dst_killable(mem_manager,emitter,stack,dst,src,1); // commutable
	src->emit_imul_inst(emitter,&((Reg_Operand*)dst)->opnd);
	src->free_opnd(&stack.reg_manager);
	stack.push(dst);
}

void gen_long_help_func(Mem_Manager& mem_manager,
			  Code_Emitter& emitter,Stack& stack,
			  Code_Patch *& code_patch_list,
			  Frame& frame,
			  ORP_RT_SUPPORT help_func,
			  Jit_Method_Info *method_info) {
	stack.call_home(4); 
	Operand *src1_hi, *src1_lo, *src2_hi, *src2_lo;
	stack.pop64(src2_lo,src2_hi);
	stack.pop64(src1_lo,src1_hi);
	src2_hi->emit_push(emitter); 
	make_esp_record(emitter.get_offset(),1,method_info,mem_manager);
	src2_lo->emit_push(emitter,1); 
	make_esp_record(emitter.get_offset(),2,method_info,mem_manager);
	src1_hi->emit_push(emitter,2);
	make_esp_record(emitter.get_offset(),3,method_info,mem_manager);
	src1_lo->emit_push(emitter,3);
	make_esp_record(emitter.get_offset(),4,method_info,mem_manager);

	void *addr = NULL;
#if 0
	//for test, should be removed
	if ( help_func == ORP_RT_LREM){
		int64 x = -1;	
		int64 y = -1;
		if ( src2_hi->kind == Operand::Imm) 
		{	
			x = ((Imm_Operand*)src2_hi)->imm_opnd.value;
			y = ((Imm_Operand*)src2_lo)->imm_opnd.value;
		}
	}
	//for test
#endif
#ifdef ORP_LONG_OPT
	if ( help_func == ORP_RT_LMUL){
		int64 x = -1;	
		if ( src2_hi->kind == Operand::Imm) 
		{	
			x = ((Imm_Operand*)src2_hi)->imm_opnd.value;
			if (x == 0 && (src2_lo->kind == Operand::Imm))
				addr = orp_get_rt_support_addr(ORP_RT_LMUL_CONST_MULTIPLIER);
			else
				addr = orp_get_rt_support_addr(help_func);
		}
		else
			addr = orp_get_rt_support_addr(help_func);
	}
	else
#endif
		addr = orp_get_rt_support_addr(help_func);
	unsigned patch_offset = emitter.get_offset()+1;
    method_info->cs_info[method_info->cnt].precall_IP = (unsigned)emitter.get_offset();
    method_info->cs_info[method_info->cnt].returns_ref = 0;
    emitter.emit_call((char *)addr);
	method_info->cs_info[method_info->cnt].ret_IP = emitter.get_offset();
    method_info->cs_info[method_info->cnt].outarg_bv = 0;
    method_info->cs_info[method_info->cnt].m_handle = NULL;
 	method_info->cs_info[method_info->cnt++].num_out_args = 4;
	code_patch_list = 
		new(mem_manager) Call_Patch(code_patch_list,patch_offset,(char*)addr);
	Register_Manager *reg_manager = &stack.reg_manager;
	src2_lo->free_opnd(reg_manager); src2_hi->free_opnd(reg_manager);
	src1_lo->free_opnd(reg_manager); src1_hi->free_opnd(reg_manager);
	stack.push64(stack.reg_manager.get_reg(eax_reg),
				 stack.reg_manager.get_reg(edx_reg));
}
//
// e.g. x%32
//   mov ecx, eax
//   and eax, 0x1f
//   cmp ecx, 0
//   jg  _done
//   test eax, eax  --- test if rem is not zero 
//   jz  _done
//   or  eax, 0xffffffe0 
//
void gen_irem_pow2(Mem_Manager& mem_manager,
				   Code_Emitter& emitter, Stack& stack,
				   Operand *dividen,  int val, int n_shift) {
	Operand *dst = dividen;
	Imm_Operand nn(n_shift);
	Operand *shf = &nn;
	make_dst_killable(mem_manager,emitter,stack,dst,shf,0); // not commutable
	assert(dst->kind == Operand::Reg);
	Reg_Operand *reg = stack.reg_manager.get_reg();
	Reg_Operand *dst_reg = (Reg_Operand*)dst;
	dst->emit_mov_to_reg(emitter,&reg->opnd);
	Imm_Operand(val - 1).emit_alu_inst(emitter,&dst_reg->opnd,and_opc);
	//
	// test if dividen is positive or not
	//
	Imm_Operand(0).emit_alu_inst(emitter,&reg->opnd,cmp_opc);
	emitter.emit_branch(cc_gt,0,1);
	int patch_pos_off = emitter.get_offset();
	char *patch_pos = emitter.get_next() - 1;
	//
	// For statistics, inserting instruments
	//
	if(inner_statistics){
		assert(emitter.prof_rec) ;
		inner_bb_instrumenting_code(emitter,
			(unsigned*)&((PROF_COUNTER*)&((unsigned short*)&emitter.prof_rec->back_edge[emitter.prof_rec->n_back_edge])[emitter.prof_rec->n_back_edge])[emitter.inner_bb_cnt_offset++]);
	}
	//
	// test if rem is zero or not
	//
    emitter.emit_test(&dst_reg->opnd,&dst_reg->opnd);
	emitter.emit_branch(cc_eq,0,0);
	int patch_neg_off = emitter.get_offset();
	char *patch_neg = emitter.get_next() - 1;
	//
	// For statistics, inserting instruments
	//
	if(inner_statistics){
		assert(emitter.prof_rec) ;
		inner_bb_instrumenting_code(emitter,
			(unsigned*)&((PROF_COUNTER*)&((unsigned short*)&emitter.prof_rec->back_edge[emitter.prof_rec->n_back_edge])[emitter.prof_rec->n_back_edge])[emitter.inner_bb_cnt_offset++]);
	}
	Imm_Operand(-val).emit_alu_inst(emitter,&dst_reg->opnd,or_opc);
	reg->free_opnd(&stack.reg_manager);
	//
	// patching correct offset
	//
	*patch_pos = emitter.get_offset() - patch_pos_off;
	*patch_neg = emitter.get_offset() - patch_neg_off;
	stack.push(dst_reg);

	//
	// For statistics
	//
	b_inner_counter = true ;
}
// 
// generate code using shift instead of idiv
//     cmp eax, 0
//     jl  _negative
//     sar eax, 5
//     jmp _done
// _negative:
//     mov ecx,eax
//     sar eax, 5
//     and ecx, 0x1f
//     test ecx, ecx
//     jz   _done
//     add eax,1
// _done:
//
void gen_idiv_pow2(Mem_Manager& mem_manager,
				   Code_Emitter& emitter, Stack& stack,
				   Operand *dividen,  int val, int n_shift) {
	Operand *dst = dividen;
	Imm_Operand nn(n_shift);
	Operand *shf = &nn;
	make_dst_killable(mem_manager,emitter,stack,dst,shf,0); // not commutable
	assert(dst->kind == Operand::Reg);
	Reg_Operand *reg = stack.reg_manager.get_reg();
	Reg_Operand *dst_reg = (Reg_Operand*)dst;
	Imm_Operand(0).emit_alu_inst(emitter,&dst_reg->opnd,cmp_opc);
	//
	// jl _negative (2 bytes); shr eax,5 (2 or 3 bytes); jmp _done (2 bytes)
	//
	emitter.emit_branch(cc_lt,0,1);
	int neg_off = emitter.get_offset();
	char *neg_patch = emitter.get_next() - 1;
	//
	// For statistics, inserting instruments
	//
	if(inner_statistics){
		assert(emitter.prof_rec) ;
		inner_bb_instrumenting_code(emitter,
			(unsigned*)&((PROF_COUNTER*)&((unsigned short*)&emitter.prof_rec->back_edge[emitter.prof_rec->n_back_edge])[emitter.prof_rec->n_back_edge])[emitter.inner_bb_cnt_offset++]);
	}
	dst->emit_shift(emitter,shr_opc,shf);
	//
	// jmp _done    (2 bytes);
	// mov ecx,eax  (2 bytes);      sar eax, 5    (2 or 3 bytes)
	// and ecx,0x1f (3 or 6 bytes); test ecx, ecx (2 bytes)
	// jz  _done    (2 bytes);      add eax,1     (3 bytes)
	//
	emitter.emit_jump(0);
	*neg_patch = emitter.get_offset() - neg_off;
	int patch_off = emitter.get_offset();
	char *patch = emitter.get_next() - 1;
	//
	// For statistics, inserting instruments
	//
	if(inner_statistics){
		assert(emitter.prof_rec) ;
		inner_bb_instrumenting_code(emitter,
			(unsigned*)&((PROF_COUNTER*)&((unsigned short*)&emitter.prof_rec->back_edge[emitter.prof_rec->n_back_edge])[emitter.prof_rec->n_back_edge])[emitter.inner_bb_cnt_offset++]);
	}
	dst->emit_mov_to_reg(emitter,&reg->opnd);
	dst->emit_shift(emitter,sar_opc,shf);
	Imm_Operand(val - 1).emit_alu_inst(emitter,&reg->opnd,and_opc);
    emitter.emit_test(&reg->opnd,&reg->opnd);
	//
	// jz _done (2 bytes);  add eax, 1 ( 3 bytes)
	//
	emitter.emit_branch(cc_eq,0,0);//Use patch!
	int patch_off2 = emitter.get_offset();
	char *patch2 = emitter.get_next() - 1;
	//
	// For statistics, inserting instruments
	//
	if(inner_statistics){
		assert(emitter.prof_rec) ;
		inner_bb_instrumenting_code(emitter,
			(unsigned*)&((PROF_COUNTER*)&((unsigned short*)&emitter.prof_rec->back_edge[emitter.prof_rec->n_back_edge])[emitter.prof_rec->n_back_edge])[emitter.inner_bb_cnt_offset++]);
	}
	Imm_Operand(1).emit_alu_inst(emitter,&dst_reg->opnd,add_opc);
	*patch = emitter.get_offset() - patch_off;
	*patch2 = emitter.get_offset() - patch_off2;
	reg->free_opnd(&stack.reg_manager);
	stack.push(dst_reg);

	//
	// For statistics
	//
	b_inner_counter = true ;
}
extern Operand *emit_alu(Mem_Manager& mem_manager, Code_Emitter& emitter,
						  Stack& stack, X86_ALU_Opcode opc,
						  Operand *dst, Operand *src, unsigned is_commutable);
//
// the routine performs dividen/divisor.  Quotient is in eax.  
// Remainder is in edx.
//
void gen_idiv(Mem_Manager& mem_manager,
			  Code_Emitter& emitter,Stack& stack,
			  Pre_Alloc_Operand_Pool& op_pool,
			  unsigned get_quotient) {
	// dividen/divisor
	Operand *divisor = stack.pop();
	Stack_Operand *stk_divisor = op_pool.nth_stack(stack.depth());
	Operand *dividen = stack.pop();

	if (divisor->kind == Operand::Imm) {
		int val = ((Imm_Operand*)divisor)->imm_opnd.value;
		int n_shift;
		//
		// generate more effecient code if the divisor is power of 2
		//
		if (val > 0 && (n_shift = is_power2(val)) != -1) {
			if (get_quotient)
				gen_idiv_pow2(mem_manager,emitter,stack,dividen,val,n_shift);
			else
				gen_irem_pow2(mem_manager,emitter,stack,dividen,val,n_shift);
			return;
		}
	}
	Register_Manager *reg_manager = &stack.reg_manager;
	//
	// Because idiv uses eax and edx, we spill divisor if divisor uses
	// eax or edx.
	//
	if ( divisor->contain(eax_reg) || divisor->contain(edx_reg) ||
		(!divisor->is_reg() && !divisor->is_mem())) {
		gen_store32(emitter,stack,&stk_divisor->opnd,divisor);
		divisor = stk_divisor;
	}
	//
	// mov dividen to eax (skip if dividen is already in eax)
	//
	if (!dividen->is_scratch_reg() || !dividen->contain(eax_reg)) {
		stack.spill_opnds_contain(eax_reg);  // free up eax
		dividen->free_opnd(reg_manager);
		dividen->emit_mov_to_reg(emitter,&eax_opnd);
		dividen = reg_manager->get_reg(eax_reg);  // hold eax
	}
	if (!reg_manager->is_free(edx_reg))
		stack.spill_opnds_contain(edx_reg);
	Reg_Operand *remainder = reg_manager->get_reg(edx_reg);
	emitter.emit_cdq();
	divisor->emit_idiv_inst(emitter);
	divisor->free_opnd(reg_manager);
	dividen->free_opnd(reg_manager);
	//
	// get_quotient determines if we want to retrieve either quotient (eax) 
	// or remainder (edx).
	//
	if (get_quotient) {
		stack.push(reg_manager->get_reg(eax_reg));
		remainder->free_opnd(reg_manager);
	} else
		stack.push(remainder);
}

void fp_rem(Mem_Manager&            mm,
            Code_Emitter&           emitter,
            Stack&                  stack,
			Pre_Alloc_Operand_Pool& op_pool,
            Operand                 *src1,
            Operand                 *src2,
            bool                    is_dbl) {

    if (stack.fp_strict_mode)
    {
	    src1->free_opnd(&stack.reg_manager);
	    src2->free_opnd(&stack.reg_manager);
	    Mem_Operand *m_src1 = (Mem_Operand*)src1;
	    Mem_Operand *m_src2 = (Mem_Operand*)src2;
        if (USE_FP_COMPATIBILITY)
        {
            set_fpu_control_word(emitter, stack, op_pool, is_dbl);
            fp_remainder(emitter,stack, op_pool, m_src1, m_src2, is_dbl);
            restore_old_fpu_control_word(emitter, stack, op_pool, is_dbl);
        }
        else
            fp_remainder(emitter,stack, op_pool, m_src1, m_src2, is_dbl);
    }
    else
    {
        load_onto_fp_stack(stack, src1, is_dbl);
        load_onto_fp_stack(stack, src2, is_dbl);

        fp_remainder_loop(emitter,stack);
        emitter.emit_fstp(1);  // pop stk(1)
        stack.fp_dec_cnt();
        result_on_fp_stack(mm, stack, is_dbl);
    }
}

void gen_frem(Mem_Manager& mm,Code_Emitter& emitter,Stack& stack,
			  Pre_Alloc_Operand_Pool& op_pool) {
	Operand *src1 = stack.pop();
	Operand *src2 = stack.pop();
    fp_rem(mm, emitter, stack, op_pool, src1, src2, false);
}

void gen_drem(Mem_Manager& mm,Code_Emitter& emitter,Stack& stack,
			  Pre_Alloc_Operand_Pool& op_pool) {
	Operand *src1, *src2, *src_hi;
	stack.pop64(src1,src_hi);
	stack.pop64(src2,src_hi);
    fp_rem(mm, emitter, stack, op_pool, src1, src2, true);
}

