#include "codegen_x86.h"
#include "ir.h"
#include <assert.h>
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>

void cg_block_init(CgBlock* b)
{
    arena_init(&b->arena);

    b->count = 0;
    b->capacity = 64;
    b->insts = arena_alloc(&b->arena, b->capacity * sizeof(CgInst*));

    b->vreg_map = NULL;

    b->next_vreg = 1;
}

void cg_block_free(CgBlock* b)
{
    arena_free(&b->arena);
    if (b->vreg_map)
        free(b->vreg_map);
}

static const char* phys_reg_name(PhysReg r)
{
    switch (r) {
        case RAX:
            return "rax";
        case RDX:
            return "rdx";
        case RCX:
            return "rcx";
        case RBX:
            return "rbx";
        case RSI:
            return "rsi";
        case RDI:
            return "rdi";
        case R8:
            return "r8";
        case R9:
            return "r9";
        default:
            return "unknown";
    }
}

static void print_vreg(VReg v)
{
    printf("v%u", v);
}

void cg_block_print_vreg(const CgBlock* block)
{
    printf("=== CgBlock (VREG view, %zu insts) ===\n", block->count);

    for (size_t i = 0; i < block->count; i++) {
        CgInst* inst = block->insts[i];

        printf("%zu: ", i);

        switch (inst->op) {

            case CG_IMM64:
                print_vreg(inst->dst);
                printf(" = IMM64 %llu\n", (unsigned long long)inst->imm);
                break;

            case CG_ADD8:
                print_vreg(inst->dst);
                printf(" = ADD8 ");
                print_vreg(inst->binop.lhs);
                printf(", ");
                print_vreg(inst->binop.rhs);
                printf("\n");
                break;

            case CG_SUB8:
                print_vreg(inst->dst);
                printf(" = SUB8 ");
                print_vreg(inst->binop.lhs);
                printf(", ");
                print_vreg(inst->binop.rhs);
                printf("\n");
                break;

            case CG_MUL8:
                print_vreg(inst->dst);
                printf(" = MUL8 ");
                print_vreg(inst->binop.lhs);
                printf(", ");
                print_vreg(inst->binop.rhs);
                printf("\n");
                break;

            default:
                printf("???\n");
                break;
        }
    }

    printf("=====================================\n");
}

void cg_block_print_phys(const CgBlock* block)
{
    printf("=== CgBlock (PHYS REG view, %zu insts) ===\n", block->count);

    for (size_t i = 0; i < block->count; i++) {
        CgInst* inst = block->insts[i];

        printf("%zu: ", i);

        switch (inst->op) {

            case CG_IMM64:
                printf("%s = IMM64 %llu\n",
                    phys_reg_name(block->vreg_map[inst->dst].reg),
                    (unsigned long long)inst->imm);
                break;

            case CG_ADD8:
                printf("%s = ADD8 %s, %s\n",
                    phys_reg_name(block->vreg_map[inst->dst].reg),
                    phys_reg_name(block->vreg_map[inst->binop.lhs].reg),
                    phys_reg_name(block->vreg_map[inst->binop.rhs].reg));
                break;

            case CG_SUB8:
                printf("%s = SUB8 %s, %s\n",
                    phys_reg_name(block->vreg_map[inst->dst].reg),
                    phys_reg_name(block->vreg_map[inst->binop.lhs].reg),
                    phys_reg_name(block->vreg_map[inst->binop.rhs].reg));
                break;

            case CG_MUL8:
                printf("%s = MUL8 %s, %s\n",
                    phys_reg_name(block->vreg_map[inst->dst].reg),
                    phys_reg_name(block->vreg_map[inst->binop.lhs].reg),
                    phys_reg_name(block->vreg_map[inst->binop.rhs].reg));
                break;

            default:
                printf("? = UNKNOWN OP %d\n", inst->op);
                break;
        }
    }

    printf("========================================\n");
}

static void cg_grow(CgBlock* b)
{
    size_t new_cap = b->capacity * 2;

    CgInst** new_arr = arena_alloc(&b->arena, new_cap * sizeof(CgInst*));

    memcpy(new_arr, b->insts, b->count * sizeof(CgInst*));

    b->insts = new_arr;
    b->capacity = new_cap;
}

static CgInst* cg_emit(CgBlock* b)
{
    if (b->count == b->capacity) {
        cg_grow(b);
    }

    CgInst* inst = arena_alloc(&b->arena, sizeof(CgInst));
    b->insts[b->count++] = inst;

    return inst;
}

void ir_block_isel_x86(CgBlock* cg, const IrBlock* ir)
{
    cg->count = 0;
    cg->next_vreg = 1;

    cg->vreg_map_size = ir->next_vreg;
    cg->vreg_map = calloc(cg->vreg_map_size, sizeof(RegMap));

    for (size_t i = 0; i < ir->count; i++) {
        IrInst* inst = ir->insts[i];

        switch (inst->op) {

            case OP_INT: {
                CgInst* out = cg_emit(cg);
                out->op = CG_IMM64;
                out->dst = inst->vreg;
                out->imm = inst->value;
                break;
            }

            case OP_ADD: {
                CgInst* out = cg_emit(cg);
                out->op = CG_ADD8;
                out->dst = inst->vreg;

                out->binop.lhs = inst->operands[0]->vreg;
                out->binop.rhs = inst->operands[1]->vreg;
                break;
            }

            case OP_SUB: {
                CgInst* out = cg_emit(cg);
                out->op = CG_SUB8;
                out->dst = inst->vreg;

                out->binop.lhs = inst->operands[0]->vreg;
                out->binop.rhs = inst->operands[1]->vreg;
                break;
            }

            case OP_MUL: {
                CgInst* out = cg_emit(cg);
                out->op = CG_MUL8;
                out->dst = inst->vreg;

                out->binop.lhs = inst->operands[0]->vreg;
                out->binop.rhs = inst->operands[1]->vreg;
                break;
            }

            default:
                break;
        }

        cg->result_vreg = inst->vreg;
    }
}

static PhysReg phys_alloc(VReg vreg, RegMap* map, size_t size)
{
    // 1. reuse if already assigned
    if (vreg < size && map[vreg].assigned)
        return map[vreg].reg;

    // 2. find free register
    static PhysReg next = RAX;

    for (int i = 0; i < REG_COUNT; i++) {
        PhysReg r = (next + i) % REG_COUNT;

        int used = 0;
        for (size_t j = 0; j < size; j++) {
            if (map[j].assigned && map[j].reg == r) {
                used = 1;
                break;
            }
        }

        if (!used) {
            next = (r + 1) % REG_COUNT;
            return r;
        }
    }

    // fallback (no spilling implemented yet)
    return RAX;
}

void cg_block_regalloc_x86(CgBlock* block)
{
    size_t n = block->vreg_map_size;

    for (size_t i = 0; i < n; i++) {
        block->vreg_map[i].assigned = 0;
    }

    for (size_t i = 0; i < block->count; i++) {
        CgInst* inst = block->insts[i];

        phys_alloc(inst->dst, block->vreg_map, n);

        if (inst->op == CG_ADD8 || inst->op == CG_SUB8 || inst->op == CG_MUL8) {

            phys_alloc(inst->binop.lhs, block->vreg_map, n);
            phys_alloc(inst->binop.rhs, block->vreg_map, n);
        }
    }

    // enforce result vreg → RAX
    VReg r = block->result_vreg;

    if (r < n) {
        block->vreg_map[r].reg = RAX;
        block->vreg_map[r].assigned = 1;
    }
}

static void test_ir_block_isel_add(void)
{
    IrBlock ir;
    ir_block_init(&ir);

    // IR: 2 + 3

    Expr two = { .type = EXPR_INT, .text = "2" };
    Expr three = { .type = EXPR_INT, .text = "3" };
    Expr add_ident = { .type = EXPR_IDENT, .text = "add" };

    Expr sexpr = { .type = EXPR_SEXPR,
        .sexpr
        = { .items = (Expr*[]) { &add_ident, &two, &three }, .count = 3 } };

    IrInst* result = ir_lower_expr(&ir, &sexpr);
    assert(result != NULL);

    CgBlock cg;
    cg_block_init(&cg);

    ir_block_isel_x86(&cg, &ir);

    // Expect: IMM 2, IMM 3, ADD
    assert(cg.count == 3);

    // 1. load 2
    assert(cg.insts[0]->op == CG_IMM64);
    assert(cg.insts[0]->imm == 2);

    // 2. load 3
    assert(cg.insts[1]->op == CG_IMM64);
    assert(cg.insts[1]->imm == 3);

    // 3. add
    assert(cg.insts[2]->op == CG_ADD8);

    // operands should reference IR vregs (not recomputed)
    assert(cg.insts[2]->binop.lhs == ir.insts[0]->vreg);
    assert(cg.insts[2]->binop.rhs == ir.insts[1]->vreg);

    cg_block_free(&cg);
    ir_block_free(&ir);
}

static void test_ir_block_isel_nested(void)
{
    IrBlock ir;
    ir_block_init(&ir);

    // IR: 2 + (3 * 4)

    Expr two = { .type = EXPR_INT, .text = "2" };
    Expr three = { .type = EXPR_INT, .text = "3" };
    Expr four = { .type = EXPR_INT, .text = "4" };

    Expr add_ident = { .type = EXPR_IDENT, .text = "add" };
    Expr mul_ident = { .type = EXPR_IDENT, .text = "mul" };

    Expr mul_expr = { .type = EXPR_SEXPR,
        .sexpr
        = { .items = (Expr*[]) { &mul_ident, &three, &four }, .count = 3 } };

    Expr add_expr = { .type = EXPR_SEXPR,
        .sexpr
        = { .items = (Expr*[]) { &add_ident, &two, &mul_expr }, .count = 3 } };

    IrInst* result = ir_lower_expr(&ir, &add_expr);
    assert(result != NULL);

    CgBlock cg;
    cg_block_init(&cg);

    ir_block_isel_x86(&cg, &ir);

    // Expect IR produces:
    // int 2, int 3, int 4, mul, add  => 5 CG instructions

    assert(cg.count == 5);

    // 0: 2
    assert(cg.insts[0]->op == CG_IMM64);
    assert(cg.insts[0]->imm == 2);

    // 1: 3
    assert(cg.insts[1]->op == CG_IMM64);
    assert(cg.insts[1]->imm == 3);

    // 2: 4
    assert(cg.insts[2]->op == CG_IMM64);
    assert(cg.insts[2]->imm == 4);

    // 3: mul
    assert(cg.insts[3]->op == CG_MUL8);

    // 4: add
    assert(cg.insts[4]->op == CG_ADD8);

    // MUL operands: 3 * 4
    assert(cg.insts[3]->binop.lhs == ir.insts[1]->vreg);
    assert(cg.insts[3]->binop.rhs == ir.insts[2]->vreg);

    // ADD operands: 2 + (3*4)
    assert(cg.insts[4]->binop.lhs == ir.insts[0]->vreg);
    assert(cg.insts[4]->binop.rhs == ir.insts[3]->vreg);

    cg_block_free(&cg);
    ir_block_free(&ir);
}

static void test_cg_block_regalloc_add(void)
{
    // Build IR: 2 + 3
    IrBlock ir;
    ir_block_init(&ir);

    Expr two = { .type = EXPR_INT, .text = "2" };
    Expr three = { .type = EXPR_INT, .text = "3" };
    Expr add_ident = { .type = EXPR_IDENT, .text = "add" };

    Expr sexpr = { .type = EXPR_SEXPR,
        .sexpr
        = { .items = (Expr*[]) { &add_ident, &two, &three }, .count = 3 } };

    IrInst* result = ir_lower_expr(&ir, &sexpr);
    assert(result != NULL);

    CgBlock cg;
    cg_block_init(&cg);

    ir_block_isel_x86(&cg, &ir);

    cg_block_regalloc_x86(&cg);

    // After RA: ADD must use physical registers, not vregs
    CgInst* add = cg.insts[2];

    assert(add->op == CG_ADD8);

    PhysReg lhs = add->binop.lhs;
    PhysReg rhs = add->binop.rhs;
    PhysReg dst = add->dst;

    // sanity: registers must be in valid range
    assert(lhs < REG_COUNT);
    assert(rhs < REG_COUNT);
    assert(dst < REG_COUNT);

    // lhs and rhs should not equal invalid sentinel values
    assert(lhs != (PhysReg)-1);
    assert(rhs != (PhysReg)-1);

    cg_block_free(&cg);
    ir_block_free(&ir);
}

static void test_cg_block_regalloc_nested(void)
{
    // IR: 2 + (3 * 4)
    IrBlock ir;
    ir_block_init(&ir);

    Expr two = { .type = EXPR_INT, .text = "2" };
    Expr three = { .type = EXPR_INT, .text = "3" };
    Expr four = { .type = EXPR_INT, .text = "4" };

    Expr add_ident = { .type = EXPR_IDENT, .text = "add" };
    Expr mul_ident = { .type = EXPR_IDENT, .text = "mul" };

    Expr mul_expr = { .type = EXPR_SEXPR,
        .sexpr
        = { .items = (Expr*[]) { &mul_ident, &three, &four }, .count = 3 } };

    Expr add_expr = { .type = EXPR_SEXPR,
        .sexpr
        = { .items = (Expr*[]) { &add_ident, &two, &mul_expr }, .count = 3 } };

    IrInst* result = ir_lower_expr(&ir, &add_expr);
    assert(result != NULL);

    CgBlock cg;
    cg_block_init(&cg);

    ir_block_isel_x86(&cg, &ir);

    cg_block_regalloc_x86(&cg);

    // ADD instruction is last
    CgInst* add = cg.insts[4];
    assert(add->op == CG_ADD8);

    // MUL instruction
    CgInst* mul = cg.insts[3];
    assert(mul->op == CG_MUL8);

    // Check register validity
    assert(add->binop.lhs < REG_COUNT);
    assert(add->binop.rhs < REG_COUNT);
    assert(add->dst < REG_COUNT);

    assert(mul->binop.lhs < REG_COUNT);
    assert(mul->binop.rhs < REG_COUNT);
    assert(mul->dst < REG_COUNT);

    // Critical correctness check:
    // MUL result should be used by ADD
    assert(add->binop.rhs == mul->dst);

    cg_block_free(&cg);
    ir_block_free(&ir);
}

void test_codegen_x86(void)
{
    test_ir_block_isel_add();
    test_ir_block_isel_nested();
    test_cg_block_regalloc_add();
    test_cg_block_regalloc_nested();
}