// Copyright (C)  2000 Intel Corporation.  All rights reserved.
//
// $Header: /usr/development/orp/orp/arch/ia32/ia32_o3_jit/priority_reg_alloc.cpp,v 1.2 2001/08/13 09:52:49 xhshi Exp $
//



// For each variable that is ever referenced, keep a bit vector of
// which basic blocks it referenced in or is live in.  This includes
// physical registers, and remember that scratch registers are
// killed by call instructions and exceptions.  We also keep a static
// reference count for all variables (but not physical registers).
//
// For each basic block, keep an 8-bit mask indicating which registers
// are still free in the basic block.  Then for each variable, sorted
// by static reference count, find a register that is free in all basic
// blocks in which the variable is live or referenced.  Assign the
// register to the variable, and then mark the register as unavailable
// in all those basic blocks.

#include "defines.h"
#include "ir.h"
#include "expression.h"
#include "flow_graph.h"
#include "bit_vector.h"
#include "bit_vector_group.h"

int is_power2(int v);

struct ref_type_count
{
    int cnt;
    GCTrack_Operand *opnd;
};

class Priority_Closure : public Closure
{
public:
    Priority_Closure(unsigned num_bb, unsigned num_vars, Bit_Vector **uses,
        unsigned char *free_regs, unsigned char *free_fp_slots, ref_type_count *refcount,
        Mem_Manager& mem, unsigned *nodearray): 
        num_bb(num_bb), num_vars(num_vars), uses(uses),
        free_regs(free_regs), free_fp_slots(free_fp_slots), refcount(refcount),
        local_mem(mem), nodearray(nodearray) {}
    Mem_Manager & local_mem;
    unsigned num_bb;
    unsigned num_vars;
    Bit_Vector **uses; // size is num_vars; each entry NULL until first use
    unsigned char *free_regs; // size is num_bb, initialized to ALL_X86_CALLER_REGS|ALL_X86_CALLEE_REGS
    unsigned char *free_fp_slots;
    ref_type_count *refcount; // size is num_vars
    unsigned *nodearray; // size is num_bb
};

class Liveness_Update : public Closure
{
public:
    Liveness_Update(GCTrack_Operand **a, unsigned s) :
      array(a), size(s) {}
    GCTrack_Operand **array;
    unsigned size;
};

static void add_use(Priority_Closure *pc, unsigned varno, Cfg_Node *node)
{
    unsigned label = node->label;
    if (varno < n_reg)
        pc->free_regs[label] &= ~(1u << varno);
    else
    {
        if (pc->uses[varno] == NULL)
            pc->uses[varno] = new(pc->local_mem) Bit_Vector(pc->num_bb,pc->local_mem, false);
        pc->uses[varno]->set(label);
    }
}

static void count_ref_opnd(Operand *opnd, Priority_Closure *pc,
                           bool is_def, bool cold, Cfg_Node *node,
                           bool add_the_use)
{
    if (opnd == NULL)
        return;
    if (opnd->kind == Operand::GCTrack)
    {
        unsigned varno = opnd->bv_position();
        // Don't count the reference if it is in a cold node.
        if (!cold)
        {
            pc->refcount[varno].cnt ++;
#if 0
            pc->refcount[varno].cnt += 4 * node->loop_depth();
#endif // 1
            assert(varno < n_reg || pc->refcount[varno].opnd == NULL || pc->refcount[varno].opnd == opnd);
            pc->refcount[varno].opnd = (GCTrack_Operand *)opnd;
        }
        if (add_the_use)
            add_use(pc, varno, node);
    }
    else
    {
        count_ref_opnd(opnd->base(),  pc, false, cold, node, true);
        count_ref_opnd(opnd->index(), pc, false, cold, node, true);
    }
}

// For every variable that is live coming into the basic block,
// mark it as used within the basic block.
static void add_live_uses(Cfg_Node *node, Priority_Closure *pc)
{
    unsigned size = node->live->num_live();
    node->live->fill_in_live_array(pc->nodearray);
    unsigned i;
    for (i=0; i<size; i++)
        add_use(pc, pc->nodearray[i], node);
}

static void count_all_refs(Cfg_Node *node, Closure *c)
{
    Priority_Closure *pc = (Priority_Closure *) c;
    bool cold = node->is_cold();
    cold = false;
    if (node->eh_in_edge() != NULL)
        pc->free_regs[node->label] &= ~ALL_X86_CALLER_REGS;
    add_live_uses(node, pc);
    Inst *last_inst = node->IR_instruction_list();
    Inst *inst = last_inst->next();
    for (;inst!=last_inst; inst=inst->next())
    {
        if (inst->is_call())
        {
            pc->free_regs[node->label] &= ~ALL_X86_CALLER_REGS;
            if (pc->free_fp_slots != NULL && !node->is_cold_non_inlined())
                pc->free_fp_slots[node->label] = 0;
        }
        Operand *dst = inst->dst();
        bool add_the_use = true;
        // If it's the last instruction, and it's an assignment to a reg_operand,
        // and the operand was never marked in this basic block, don't mark it here.
        if (false && dst != NULL && dst->is_reg())
        {
            if (inst->next() == last_inst)
            {
                Bit_Vector *bv = pc->uses[dst->bv_position()];
                if (!IS_FP_DBL_TYPE(dst->type) && 
                    (bv == NULL || !bv->is_set(node->label)))
                    add_the_use = false;
            }
        }
        count_ref_opnd(dst, pc, true, cold, node, add_the_use);
        unsigned i;
        for (i=0; i<inst->n_srcs; i++)
            count_ref_opnd(inst->src(i), pc, false, cold, node, true);
    }
}

static void update_liveness(Cfg_Node *node, Closure *cl)
{
    Liveness_Update *c = (Liveness_Update *)cl;
    unsigned i;
    unsigned size = c->size;
    GCTrack_Operand **array = c->array;
    for (i=0; i<size; i++)
    {
        unsigned bvp = array[i]->bv_position();
        assert(bvp < n_reg);
        if (node->live->is_live_var(array[i]->id))
        {
            node->live->mark_live(bvp, array[i]->type);
            // XXX- maybe we should mark the old value as dead.
        }
    }
}

static int compare_refcount(const void *elem1, const void *elem2)
{
    ref_type_count *e1 = (ref_type_count *) elem1;
    ref_type_count *e2 = (ref_type_count *) elem2;
    int c1 = e1->cnt;
    int c2 = e2->cnt;
    if (c1 > 0 && c1 == c2)
        return e1->opnd->bv_position() - e2->opnd->bv_position();
    return c2 - c1;  // Cause it to sort in decreasing order.
}

static void sort_refcounts(ref_type_count *refcount, unsigned &num_vars)
{
    unsigned cur = 0;
    unsigned i;
    for (i=0; i<num_vars; i++)
    {
        if (refcount[i].cnt > 0 &&
            refcount[i].opnd != NULL &&
            refcount[i].opnd->global_reg_alloc_cand() &&
            !IS_FP_DBL_TYPE(refcount[i].opnd->type))
        {
            if (i != cur)
            {
                refcount[cur] = refcount[i];
            }
            cur ++;
        }
    }
    qsort(refcount, cur, sizeof(ref_type_count), compare_refcount);
    num_vars = cur;
}

static void sort_fp_refcounts(ref_type_count *refcount, unsigned &num_vars)
{
    unsigned cur = 0;
    unsigned i;
    for (i=0; i<num_vars; i++)
    {
        if (refcount[i].cnt > 0 &&
            refcount[i].opnd != NULL &&
            refcount[i].opnd->is_global_fp_cand() &&
            IS_FP_DBL_TYPE(refcount[i].opnd->type))
        {
            if (i != cur)
            {
                refcount[cur] = refcount[i];
            }
            cur ++;
        }
    }
    qsort(refcount, cur, sizeof(ref_type_count), compare_refcount);
    num_vars = cur;
}

// Returns a bitmask of the callee-saved registers used.
unsigned priority_reg_alloc(Flow_Graph *fg, Expressions &exprs)
{
    extern unsigned O3_num_fp_globals;
    unsigned result = 0;
    Mem_Manager mem(1000);
    unsigned num_bb = fg->reassign_label();
    unsigned num_vars = exprs.reg_map.curr_tmp_reg_id();
    Bit_Vector **uses = (Bit_Vector **) mem.alloc(num_vars * sizeof(*uses));
    memset(uses, 0, num_vars * sizeof(*uses));
    unsigned char *free_regs = (unsigned char *) mem.alloc(num_bb);
    memset(free_regs, /*ALL_X86_CALLER_REGS|*//*(1u<<edx_reg)|*/ALL_X86_CALLEE_REGS, num_bb);
    unsigned char *free_fp_slots = NULL;
    if (fg->has_fp)
    {
        free_fp_slots = (unsigned char *) mem.alloc(num_bb);
        memset(free_fp_slots, O3_num_fp_globals, num_bb);
    }
    ref_type_count *refcount = (ref_type_count *) mem.alloc(num_vars * sizeof(*refcount));
    memset(refcount, 0, num_vars * sizeof(*refcount));
    unsigned max = (num_bb > num_vars ? num_bb : num_vars);
    unsigned *nodearray = (unsigned *) mem.alloc(max * sizeof(*nodearray));
    Priority_Closure pc(num_bb, num_vars, uses, free_regs, free_fp_slots, refcount, mem,
                        nodearray);
    fg->apply(count_all_refs, &pc);
    GCTrack_Operand **vararray;  // the variables that were register-allocated
    unsigned n_reg_allocated = 0;
    vararray = (GCTrack_Operand **) mem.alloc(num_vars * sizeof(*vararray));
    ref_type_count *fp_refcount = NULL;
    unsigned num_fp_vars_used = 0;
    if (fg->has_fp)
    {
        fp_refcount = (ref_type_count *) mem.alloc(num_vars * sizeof(*fp_refcount));
        num_fp_vars_used = num_vars;
        memcpy(fp_refcount, refcount, num_vars * sizeof(*fp_refcount));
        sort_fp_refcounts(fp_refcount, num_fp_vars_used);
    }
    unsigned num_vars_used = num_vars;
    sort_refcounts(refcount, num_vars_used);

    unsigned cur_index;
    for (cur_index=0; cur_index<num_vars_used; cur_index++)
    {
        Operand *opnd = refcount[cur_index].opnd;
        GCTrack_Operand *vopnd = (GCTrack_Operand *)opnd;
        unsigned varno = vopnd->bv_position();
        //cout << "candidate=" << candidate << ", refcount=" << refcount[candidate].cnt << endl;
        unsigned mask = ALL_X86_CALLER_REGS|ALL_X86_CALLEE_REGS;
        int oldrefcount = refcount[cur_index].cnt;
        if (oldrefcount <= 2)
            mask = result | ALL_X86_CALLER_REGS;
        Bit_Vector *bv = uses[varno];
        assert(bv != NULL);
        unsigned bvsize = bv->bits_set();
        //if (bvsize <= 2) continue;  // Force it to have a reasonable large live range.
        bv->fill_in_index_array(nodearray);
        unsigned i;
        for (i=0; mask && i<bvsize; i++)
            mask &= free_regs[nodearray[i]];
        if (!mask) continue;
#if 0
        if ((mask & ALL_X86_CALLER_REGS) && is_power2(mask) != -1)
            continue;
#endif // 0
        unsigned reg;
        for (reg=0; reg<n_reg; reg++)
        {
            if (mask & (1u << reg))
                break;
        }
        for (i=0; i<bvsize; i++)
            free_regs[nodearray[i]] &= ~(1u << reg);
        vararray[n_reg_allocated++] = vopnd;
        assert(vopnd->assigned_preg() == n_reg);
        vopnd->set_assigned_preg((X86_Reg_No)reg);
        result |= (1u << reg);
#ifdef TRACE_O3xxx
        cout << "priority_reg_alloc: assigning " << X86_Reg_Str[reg] << " to variable " << varno
            << " (" << oldrefcount << " references)" << endl;
#endif // TRACE_O3
    }

    // Now try to do some FP register allocation.
    if (fg->has_fp)
    {
        unsigned cur_index;
        for (cur_index=0; cur_index<num_fp_vars_used; cur_index++)
        {
            Operand *opnd = fp_refcount[cur_index].opnd;
            assert(opnd->is_reg());
            Reg_Operand *ropnd = (Reg_Operand *)opnd;
            unsigned varno = ropnd->bv_position();
            //cout << "candidate=" << candidate << ", refcount=" << fp_refcount[candidate].cnt << endl;
            unsigned mask = ALL_X86_CALLER_REGS|ALL_X86_CALLEE_REGS;
            int oldrefcount = fp_refcount[cur_index].cnt;
            Bit_Vector *bv = uses[varno];
            assert(bv != NULL);
            unsigned bvsize = bv->bits_set();
            //if (bvsize <= 1) continue;  // Force it to have a reasonable large live range.
            bv->fill_in_index_array(nodearray);
            // Find out whether there is at least one free slot in every BB.
            unsigned i;
            for (i=0; i<bvsize; i++)
            {
                if (free_fp_slots[nodearray[i]] == 0)
                    break;
            }
            if (i < bvsize)
                continue;
            // Decrement the free slot count for each BB.
            for (i=0; i<bvsize; i++)
                free_fp_slots[nodearray[i]] --;
            assert(!ropnd->is_globally_allocated_fp());
            ropnd->set_globally_allocated_fp();
#ifdef TRACE_O3
            cout << "priority_reg_alloc: assigning global FP slot to variable " << varno
                << " (" << oldrefcount << " references)" << endl;
#endif // TRACE_O3
        }
    }

    if (n_reg_allocated > 0)
    {
        Liveness_Update lu(vararray, n_reg_allocated);
        fg->apply(update_liveness, &lu);
    }
    // Returns a bit mask representing the callee-saved registers that were assigned.
    result = result & ALL_X86_CALLEE_REGS;
    //
    // keep return values in callee-save registers to avoid spilling across
    // monitorexit calls
    //
    void keep_return_values_in_regs(Cfg_Node *epilog, unsigned reg_usage, Method_Handle mh);
    if (method_is_synchronized(fg->m_handle())) {
        keep_return_values_in_regs(fg->epilog(), result, fg->m_handle());
    }
    return result;
}
