// Copyright (C)  2000 Intel Corporation.  All rights reserved.
//
// $Header: /usr/development/orp/orp/arch/ia32/ia32_o3_jit/code_gen.cpp,v 1.9 2001/12/27 03:24:44 xhshi Exp $
//

#include "defines.h"
#include <iostream.h>
#include "Mem_Manager.h"
#include "ir.h"
#include "stack.h"
#include "cg_prepass.h"
#include "expression.h"
#include "code_gen.h"
#include "flow_graph.h"
#include "dumpjit.h"
#include "mtable.h"
#include "global_reg_alloc.h"
#include "build_ir_routines.h"
#include "optimizer.h"
#include "opt_bound_elimination.h"
#include "local_cse.h"
#include "o3_profiling.h"
#include "handles.h"
#include "copy_prop.h"

#ifdef CAFFEINE_MARK
#include "loop_invariant_path.h"
#include "bytecode_opt.h"
#endif

#ifdef O3_VTune_Support

#include "jit_common.h"
#include "..\ia32_o1_jit\vtune.h"
#include "disass_str.h"
#include "dumpjit.h"

union Map_Entry {
	unsigned	  offset;
	Branch_Patch *patch;
};

//FILE* fp_vtune_debug = NULL;
#endif

const char *O3_envvar_METHODS = NULL;
const char *O3_envvar_INLINE  = NULL;
const char *O3_envvar_ARGS    = NULL;
const char *O3_envvar_DOTFILES= NULL;
static O3_Method_Table *mtable = NULL;
O3_Method_Table *O3_itable = NULL;
O3_Method_Table *O3_dottab = NULL;
bool one_dot_file = false;
bool extended_dumpjit = false;

//Class_Handle java_lang_obj = NULL;

bool use_compensation;
const char *O3_envvar_COMP= NULL;
O3_Method_Table *O3_comp = NULL;

JIT_Result build_IR_of_method(
    Compile_Handle cmpl_handle, 
    Class_Handle   c_handle, 
    Method_Handle  m_handle,
    bool gc_requires_write_barriers,
    Mem_Manager&   mem_manager,
    Expressions&   exprs,
    Flow_Graph*&   fg,  // OUT
    Eh_Node        *caller_eh,
    Cfg_Node       *caller_subr,
    Flow_Graph *caller_fg,
    unsigned caller_bc_idx,
    const Byte     *bytecode_addr, 
    size_t         bytecode_size, 
    unsigned       max_stack,
    unsigned       max_locals,
    unsigned       accumulated_inlined_bc_size,
    unsigned&      global_inlined_bc_size) {

    CG_Prepass prepass(cmpl_handle,c_handle, m_handle, mem_manager,
		               bytecode_addr, bytecode_size, max_stack, max_locals);
    if (prepass.result != JIT_SUCCESS)
        return prepass.result;

#ifdef TRACE_O3xx
    dumpjit_bytecodes_only(cout,bytecode_addr,bytecode_size,m_handle,c_handle);
#endif // TRACE_O3

    //
    // create java mimic stack
    //
    Stack stack(mem_manager,max_stack);

    fg = new(mem_manager) Flow_Graph(bytecode_addr, bytecode_size, max_locals, &prepass,
                                     mem_manager,
                                     0,
                                     max_stack,
                                     caller_eh,
                                     caller_subr,
                                     caller_fg, caller_bc_idx,
                                     cmpl_handle, m_handle, c_handle);

    // all transformations of loops, i.e. normalization, linearization, peeling, unrolling,
    // are done here before IRs get generated.
    bool did_peeling = false;
    if (caller_fg == NULL ||   // not inlined method
        fg->handlers()->next() == fg->handlers()) // no exception handler
        loop_transformation(fg, did_peeling);
    //
    // build intermediate representation (IR)
    // 
    exprs.set_live_lcse(NULL);
    //build_incoming_arg_assignment(c_handle,m_handle,mem_manager, exprs, fg->prolog());
    fg->Build_IR(cmpl_handle, exprs, stack, gc_requires_write_barriers);

    //
    // perform bound checking elimination optimization
    //
    Compilation_Env comp_env(cmpl_handle, c_handle, m_handle,
                             bytecode_addr,bytecode_size,max_locals,max_stack);
    if (did_peeling)
    {
        copy_prop(fg, exprs);
        fold_branch(fg);
    }
    bound_checking_elimination(comp_env, fg, exprs, gc_requires_write_barriers);

	//
    // perform inlining
    //
    inline_methods(cmpl_handle,mem_manager,exprs,fg,accumulated_inlined_bc_size,
        global_inlined_bc_size, gc_requires_write_barriers);

    fg->prune_unreachable();

	return JIT_SUCCESS;
}

//
// determine how many incoming arguments are passed on the runtime stack
//
static unsigned n_inargs_on_stack(Expressions& exprs, Method_Handle m_handle) {
    //
    // count num of argument words
    //
    Operand_Exp *arg;
    unsigned ith = 0;
    int off = 0;
    if (!method_is_static(m_handle)) {
        arg = exprs.lookup_arg_exp(ith,JIT_TYPE_CLASS);
        if (arg->opnd->assigned_preg() == n_reg) {
            ((Arg_Operand*)arg->opnd)->set_home_location(off++);
            ((Arg_Operand*)arg->opnd)->set_use_arg_home_loc();
        }
        ith++;
    }
    Arg_List_Iterator iter = method_get_argument_list(m_handle);
    Java_Type type;
    while((type = curr_arg(iter)) != JAVA_TYPE_END) {
        CONVERT_TO_INT_JAVA(type);
        arg = exprs.lookup_arg_exp(ith,(O3_Jit_Type)type);
        if (arg->opnd->assigned_preg() == n_reg) {
            ((Arg_Operand*)arg->opnd)->set_home_location(off++);
            ((Arg_Operand*)arg->opnd)->set_use_arg_home_loc();
        }
        //
        // double value is always pushed (check off > 0)
        // long value's hi-32 is pushed if we run out of regs (check ith +1)
        //
        if (IS_64BIT(type)) {
            assert(arg->opnd->hi_opnd() != NULL);
            if (arg->opnd->hi_opnd()->assigned_preg() == n_reg) {
                ((Arg_Operand*)arg->opnd->hi_opnd())->set_home_location(off++);
                ((Arg_Operand*)arg->opnd->hi_opnd())->set_use_arg_home_loc();
            }
            else {
                ((Arg_Operand*)arg->opnd)->set_home_location(off++);
            }
        }
        ith += (IS_64BIT(type))? 2 : 1;
        iter = advance_arg_iterator(iter);
    }
    return off;
}

bool O3_is_PPro = false;


static char get_cpuid_code_bytes[16] =
{ 
    '\xB8', '\x01', '\x00', '\x00', '\x00', // mov eax, 1
    '\x0F', '\xA2',                         // cpuid
    '\x8B', '\xC2',                         // mov eax, edx
    '\xC3'                                  // ret
};

static unsigned get_cpuid_feature_flags()
{
    unsigned (*f)(void) = (unsigned (*)(void))(void *)get_cpuid_code_bytes;
    unsigned result = f();
    return result;
}


JIT_Result	O3_compile_method(
	Compile_Handle   compilation_handle,
    Class_Handle  class_handle,
    Method_Handle method_handle,
    const Byte *  bytecode_addr,
    size_t        bytecode_size,

    unsigned      max_locals,
    unsigned      max_stack,
    unsigned      use_fast_call,
    bool          gc_requires_write_barriers) {

    static unsigned call_count=0;
    if (call_count++==0)
    {
        init_handles(class_handle);
        mtable    = new O3_Method_Table(O3_envvar_METHODS, "METHODS", true);
        O3_itable = new O3_Method_Table(O3_envvar_INLINE, "INLINE", true);
        O3_dottab = new O3_Method_Table(O3_envvar_DOTFILES, "DOTFILES", true);
        O3_comp   = new O3_Method_Table(O3_envvar_COMP,     "COMP", true);

        O3_is_PPro = ((get_cpuid_feature_flags() & 0x8001) == 0x8001);
    }

	// 
	// create memory manager
	//
    unsigned size_estimate = 250 * bytecode_size;  // 250 is based on empirical evidence.
	Mem_Manager mem_manager(size_estimate);

    // Decide whether to JIT this method.  The test is done only at
    // the entry point from the VM, and not as part of the inlining
    // decision.
    if (!mtable->accept_this_method(method_handle))
        return JIT_FAILURE;
    use_compensation = O3_comp->accept_this_method(method_handle);

    const char *method_name = method_get_name(method_handle);
    const char *class_name = class_get_name(class_handle);
#ifdef TRACE_O3
    cout << "O3 compiling: " << class_name << "." << method_name <<
        method_get_descriptor(method_handle) << endl;
#endif // TRACE_O3

    //
	// create reg mapping (preparation for building IR)
	// 
    RegID_Map reg_map(mem_manager, max_locals, max_stack, n_reg);
    //
    // create kill id mapping (preparation for building expressions)
    //
    Kill_ID_Map kill_id_map;
    //
    // Memory manager for local CSE
    //
	Mem_Manager lcse_mm(bytecode_size*sizeof(LCSE));
    LCSE_Pool lcse_pool(lcse_mm);
    //
    // create an expression pool
    //
    Expressions expressions(mem_manager,reg_map,kill_id_map,lcse_pool);  // reg id starts from 0
    //
    // Flow Graph of the method
    //
    Flow_Graph *fg;
#ifdef CAFFEINE_MARK
    // before entering prepass, do some byte code optimization
    bytecode_optimization((Byte *)bytecode_addr, bytecode_size, max_stack, max_locals);
#endif
    //
    // build IR for the method
    //
//yzw
	const char* mn = method_get_name( method_handle);
//yzw
    unsigned inlined_bc_size = 0;
    unsigned global_inlined_bc_size = 0;
    JIT_Result result = 
    build_IR_of_method(compilation_handle, class_handle, method_handle, gc_requires_write_barriers,
                       mem_manager, expressions, fg, NULL, NULL, NULL, 0,
                       bytecode_addr, bytecode_size, max_stack, max_locals,
                       inlined_bc_size, global_inlined_bc_size);
    if (result != JIT_SUCCESS)
        return result;

#ifdef TRACE_O3
    fg->print_cfg("begin");
#endif // TRACE_O3

#ifdef STAT_INDIRECT_CALL
	extern void instrument_indr_call(Cfg_Node *node, Closure *c);
	Instrument_Closure ins_closure2(expressions,fg) ;
	fg->apply(instrument_indr_call,&ins_closure2) ;
#endif

#ifdef O3_VTune_Support1
    O3_VTune_Closure c( expressions, fg);
//    fg->apply(insert_vtune_call_code,&c);
#endif

    //
    // generate monitor enter/exit call
    //
    if (method_is_synchronized(method_handle)) {
        Inst *inst = fg->prolog()->IR_instruction_list();
        gen_synch_method_enter(expressions,class_handle,method_handle,inst);
        inst = fg->epilog()->IR_instruction_list()->prev();
        gen_synch_method_exit(expressions,class_handle,method_handle,inst);
    }

    extern bool O3_lazy_exc;
    void optimize_throws(Flow_Graph *fg, Expressions &exprs);
    if (O3_lazy_exc)
        optimize_throws(fg, expressions);

    //
    // perform global optimization
    //
    Compilation_Env comp_env(compilation_handle, class_handle, method_handle,
                             bytecode_addr,bytecode_size,max_locals,max_stack);
    Optimizer optimizer(comp_env, fg, expressions, gc_requires_write_barriers);
    optimizer.global_optimization();

#ifdef CAFFEINE_MARK
#ifdef TRACE_O3
    fg->print_cfg("0e");
#endif // TRACE_O3
    // optimization for Caffeine mark
    bool loopopt_success = transform_loop_invariant_path(fg, expressions);
#endif
    //
    // The default ordering is the same as the ordering in the bytecode.
    // If some transformations (e.g. bound checking elimination) have modified 
    // the flow graph and the original linearization ordering is no longer a 
    // good one, then we run linearize() to determine the ordering.  
    //
    if (true || fg->need_linearization())
        fg->linearize();
    //
    // local register allocation
    //
#ifdef TRACE_O3
    fg->print_cfg("1");
#endif // TRACE_O3

	//
	// For inner bb instrument, malloc memory first.
	if(Inner_O3_statistics){
		assert(fg->inner_counter==NULL) ;
		assert(fg->inner_counter_num==0) ;
		//alloc O3_MIN_INNER_BRANCH_SIZE*2 slots
		//!! maybe not enough!
		fg->inner_counter = (unsigned*)method_allocate_jit_data_block(fg->m_handle(),fg->cmpl_handle(),sizeof(unsigned)*2*O3_MIN_INNER_BRANCH_SIZE);
		memset(fg->inner_counter,0,sizeof(unsigned)*2*O3_MIN_INNER_BRANCH_SIZE) ;// for is more safety?
	}

	unsigned scratch_regs_used = local_reg_allocation(fg, expressions);

#ifdef TRACE_O3
    fg->print_cfg("2");
#endif // TRACE_O3

    unsigned args_on_stack = n_inargs_on_stack(expressions,method_handle);
    //
    // global register allocation
    // Dead code elimination is done first, just to help avoid saving/restoring
    // registers for variables that are killed.
    //
    fg->dead_code_eliminate(expressions, false);

#ifdef TRACE_O3
    fg->print_cfg("2a");
#endif // TRACE_O3

    global_reg_allocation(fg, expressions, scratch_regs_used);
#ifdef TRACE_O3
    fg->print_cfg("2b");
#endif // TRACE_O3
    //
    // After dead code elimination, some blocks may become empty
    // 
//yzw
//	cout << "METHOD_NAME:    " << mn << endl;
///    
	fg->remove_empty_blocks();
    fg->prune_unreachable();
#ifdef TRACE_O3
    fg->print_cfg("3");
#endif // TRACE_O3
    //
    // inserting profiling code
    //
    insert_profiling_code(fg, expressions);

    //
    // emit code
    //
#ifdef TRACE_O3
    fg->print_cfg("44");
#endif // TRACE_O3

    fg->dead_code_eliminate(expressions, true);
#ifdef TRACE_O3
    fg->print_cfg("44-1");
#endif // TRACE_O3
    fg->home_location_assignment();
#ifdef TRACE_O3
    fg->print_cfg("44-2");
#endif // TRACE_O3
    void peephole_opt(Flow_Graph *fg);
    extern bool O3_peephole;
    if (O3_peephole)
        peephole_opt(fg);
#ifdef TRACE_O3
    fg->print_cfg("44-3");
#endif // TRACE_O3


#ifdef CAFFEINE_MARK
    code_emission(compilation_handle, class_handle, method_handle, 
                  expressions, fg, bytecode_size,args_on_stack, loopopt_success);
#else
    code_emission(compilation_handle, class_handle, method_handle, 
                  expressions, fg, bytecode_size,args_on_stack);
#endif

#ifdef TRACE_O3
    fg->print_cfg("4");
#endif // TRACE_O3
#if 0
    cout << class_get_name(class_handle) << "." << method_get_name(method_handle)
         << ": total allocation: " << mem_manager.bytes_allocated() << " bytes, "
         << bytecode_size << " bytecodes" << endl << "    ratio=" <<
         ((double)mem_manager.bytes_allocated()/bytecode_size) << endl;
#endif

#ifdef PRINTABLE_O3
    if (false && O3_statistics) 
    {
        dump_control_flow(fg);
    }
#endif
#ifdef TRACE_O3
    cout << "O3 finished compiling: " << method_name << endl << endl;
#endif // TRACE_O3

#ifdef O3_VTune_Support
#if 0
	static int first = 0;

	if ( !fp_vtune_debug)
		fp_vtune_debug = fopen( "vtune_debug.txt", "w+");
	
	if ( strcmp( "main", method_name) == 0){
		int x = 0;
	}
	fprintf( fp_vtune_debug, "method_name: %s\n", method_name);
	fprintf( fp_vtune_debug, "class_name: %s\n", class_name);
#endif
	
//	printf("Starting VTune\n");
	iJIT_Method_Load *mInfo;
	mInfo = (iJIT_Method_Load*)malloc(sizeof(iJIT_Method_Load)) ;

//	printf( "%s.%s\n", class_name, method_name);
	unsigned char* code_block = method_get_code_block_addr(method_handle);
	unsigned code_size =  method_get_code_block_size(method_handle);
//	printf( "code_block: %p\tcode_size: %d\n", code_block, code_size);
	//    if (code_block != NULL && VTuneModeFlags) {
	if (code_block){
//#if 0
        char buf[512], *source_filename = NULL;
        int lineNo=0;  lineInfo_t *lineInfo=NULL;
		
		if (java_sourcefile(buf,sizeof(buf),(char*)class_name, (char*)method_name,bytecode_size,&lineNo,&lineInfo)) {
			source_filename = strdup(buf);
		}
//#endif		
        mInfo->method_id = (unsigned long)method_handle;
        mInfo->method_name = (char*)method_name;
        mInfo->method_load_address = (unsigned long)code_block;
        mInfo->method_size = code_size;
        mInfo->class_id = (unsigned long)class_handle;
        mInfo->class_file_name = (char*)class_name;
//		mInfo->source_file_name = NULL;
//		mInfo->line_number_size = 0;
//		mInfo->line_number_table = NULL;
//		mInfo->
#if 1
		mInfo->source_file_name = source_filename;
        
        mInfo->line_number_size = lineNo;
		
        LineNumberInfo *lines = (mInfo->line_number_size==0)? NULL :
        (LineNumberInfo *)malloc(sizeof(LineNumberInfo) *
            ((int)mInfo->line_number_size));
	
        mInfo->line_number_table = lines;
        
        LineNumberInfo *pl = lines;
		//
        for (unsigned long i=0; i<mInfo->line_number_size; i++) {
            unsigned start_pc, line_number;
            start_pc = lineInfo[i].start_pc;
            line_number = lineInfo[i].line_number;
            pl->LineNumber = line_number;
//            pl->Offset = map[start_pc].offset;
            pl++;
        }
        free(lineInfo);
#endif
    }

//    if (code_mi != cm_gen_method && res==JIT_SUCCESS) {
//		if (VTuneModeFlags & iJIT_BE_NOTIFY_ON_LOAD) {
//          printf("Starting to notify VTune\n");

			int notify_VTune =
                iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED, (void *)mInfo);
//			printf("Notified VTune\n");
//		}
//    }
#endif // VTune_Support


    result = JIT_FAILURE;  // Remove this eventually
    result = JIT_SUCCESS;
    return result;
}

