commit e2c5f1c66eb67eea44b14aef8026e18f01745835 Author: sfja Date: Thu May 21 00:31:25 2026 +0200 init diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000..a56cbd0 --- /dev/null +++ b/.clang-format @@ -0,0 +1,14 @@ +Language: Cpp +BasedOnStyle: WebKit +IndentWidth: 4 +ColumnLimit: 80 +IndentCaseLabels: true +InsertNewlineAtEOF: true +AllowShortFunctionsOnASingleLine: None + +BinPackArguments: false +AllowAllArgumentsOnNextLine: true + +BinPackParameters: false +AllowAllParametersOfDeclarationOnNextLine: true + diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..6f31401 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +build/ +.vscode/ diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..2c96a43 --- /dev/null +++ b/Makefile @@ -0,0 +1,25 @@ +CC = gcc + +CFLAGS = -std=c23 \ + -Wall \ + -Wextra \ + -pedantic-errors \ + -g \ + -fsanitize=address + +BUILD_DIR = build +TARGET = $(BUILD_DIR)/main +SRC = main.c parse.c ir.c arena.c codegen_x86.c jit_x86.c + +all: $(TARGET) + +$(BUILD_DIR): + mkdir -p $(BUILD_DIR) + +$(TARGET): $(SRC) | $(BUILD_DIR) + $(CC) $(CFLAGS) $(SRC) -o $(TARGET) + +clean: + rm -rf $(BUILD_DIR) + +.PHONY: all clean \ No newline at end of file diff --git a/arena.c b/arena.c new file mode 100644 index 0000000..9d52f56 --- /dev/null +++ b/arena.c @@ -0,0 +1,43 @@ +#include "arena.h" +#include +#include +#include + +void arena_init(Arena* a) +{ + a->data = NULL; + a->size = 0; + a->capacity = 0; +} + +void* arena_alloc(Arena* a, size_t size) +{ + // simple alignment to 8 bytes + size = (size + 7) & ~((size_t)7); + + if (a->size + size > a->capacity) { + size_t new_cap = a->capacity ? a->capacity * 2 : 1024; + + while (new_cap < a->size + size) + new_cap *= 2; + + uint8_t* new_data = realloc(a->data, new_cap); + if (!new_data) + return NULL; + + a->data = new_data; + a->capacity = new_cap; + } + + void* ptr = a->data + a->size; + a->size += size; + return ptr; +} + +void arena_free(Arena* a) +{ + free(a->data); + a->data = NULL; + a->size = 0; + a->capacity = 0; +} diff --git a/arena.h b/arena.h new file mode 100644 index 0000000..537bcd7 --- /dev/null +++ b/arena.h @@ -0,0 +1,18 @@ +#ifndef ARENA_H +#define ARENA_H + +#include +#include +#include + +typedef struct { + uint8_t* data; + size_t size; + size_t capacity; +} Arena; + +void arena_init(Arena* a); +void* arena_alloc(Arena* a, size_t size); +void arena_free(Arena* a); + +#endif diff --git a/codegen_x86.c b/codegen_x86.c new file mode 100644 index 0000000..e2970d5 --- /dev/null +++ b/codegen_x86.c @@ -0,0 +1,517 @@ +#include "codegen_x86.h" +#include "ir.h" +#include +#include +#include +#include +#include + +void cg_block_init(CgBlock* b) +{ + arena_init(&b->arena); + + b->count = 0; + b->capacity = 64; + b->insts = arena_alloc(&b->arena, b->capacity * sizeof(CgInst*)); + + b->vreg_map = NULL; + + b->next_vreg = 1; +} + +void cg_block_free(CgBlock* b) +{ + arena_free(&b->arena); + if (b->vreg_map) + free(b->vreg_map); +} + +static const char* phys_reg_name(PhysReg r) +{ + switch (r) { + case RAX: + return "rax"; + case RDX: + return "rdx"; + case RCX: + return "rcx"; + case RBX: + return "rbx"; + case RSI: + return "rsi"; + case RDI: + return "rdi"; + case R8: + return "r8"; + case R9: + return "r9"; + default: + return "unknown"; + } +} + +static void print_vreg(VReg v) +{ + printf("v%u", v); +} + +void cg_block_print_vreg(const CgBlock* block) +{ + printf("=== CgBlock (VREG view, %zu insts) ===\n", block->count); + + for (size_t i = 0; i < block->count; i++) { + CgInst* inst = block->insts[i]; + + printf("%zu: ", i); + + switch (inst->op) { + + case CG_IMM64: + print_vreg(inst->dst); + printf(" = IMM64 %llu\n", (unsigned long long)inst->imm); + break; + + case CG_ADD8: + print_vreg(inst->dst); + printf(" = ADD8 "); + print_vreg(inst->binop.lhs); + printf(", "); + print_vreg(inst->binop.rhs); + printf("\n"); + break; + + case CG_SUB8: + print_vreg(inst->dst); + printf(" = SUB8 "); + print_vreg(inst->binop.lhs); + printf(", "); + print_vreg(inst->binop.rhs); + printf("\n"); + break; + + case CG_MUL8: + print_vreg(inst->dst); + printf(" = MUL8 "); + print_vreg(inst->binop.lhs); + printf(", "); + print_vreg(inst->binop.rhs); + printf("\n"); + break; + + default: + printf("???\n"); + break; + } + } + + printf("=====================================\n"); +} + +void cg_block_print_phys(const CgBlock* block) +{ + printf("=== CgBlock (PHYS REG view, %zu insts) ===\n", block->count); + + for (size_t i = 0; i < block->count; i++) { + CgInst* inst = block->insts[i]; + + printf("%zu: ", i); + + switch (inst->op) { + + case CG_IMM64: + printf("%s = IMM64 %llu\n", + phys_reg_name(block->vreg_map[inst->dst].reg), + (unsigned long long)inst->imm); + break; + + case CG_ADD8: + printf("%s = ADD8 %s, %s\n", + phys_reg_name(block->vreg_map[inst->dst].reg), + phys_reg_name(block->vreg_map[inst->binop.lhs].reg), + phys_reg_name(block->vreg_map[inst->binop.rhs].reg)); + break; + + case CG_SUB8: + printf("%s = SUB8 %s, %s\n", + phys_reg_name(block->vreg_map[inst->dst].reg), + phys_reg_name(block->vreg_map[inst->binop.lhs].reg), + phys_reg_name(block->vreg_map[inst->binop.rhs].reg)); + break; + + case CG_MUL8: + printf("%s = MUL8 %s, %s\n", + phys_reg_name(block->vreg_map[inst->dst].reg), + phys_reg_name(block->vreg_map[inst->binop.lhs].reg), + phys_reg_name(block->vreg_map[inst->binop.rhs].reg)); + break; + + default: + printf("? = UNKNOWN OP %d\n", inst->op); + break; + } + } + + printf("========================================\n"); +} + +static void cg_grow(CgBlock* b) +{ + size_t new_cap = b->capacity * 2; + + CgInst** new_arr = arena_alloc(&b->arena, new_cap * sizeof(CgInst*)); + + memcpy(new_arr, b->insts, b->count * sizeof(CgInst*)); + + b->insts = new_arr; + b->capacity = new_cap; +} + +static CgInst* cg_emit(CgBlock* b) +{ + if (b->count == b->capacity) { + cg_grow(b); + } + + CgInst* inst = arena_alloc(&b->arena, sizeof(CgInst)); + b->insts[b->count++] = inst; + + return inst; +} + +void ir_block_isel_x86(CgBlock* cg, const IrBlock* ir) +{ + cg->count = 0; + cg->next_vreg = 1; + + cg->vreg_map_size = ir->next_vreg; + cg->vreg_map = calloc(cg->vreg_map_size, sizeof(RegMap)); + + for (size_t i = 0; i < ir->count; i++) { + IrInst* inst = ir->insts[i]; + + switch (inst->op) { + + case OP_INT: { + CgInst* out = cg_emit(cg); + out->op = CG_IMM64; + out->dst = inst->vreg; + out->imm = inst->value; + break; + } + + case OP_ADD: { + CgInst* out = cg_emit(cg); + out->op = CG_ADD8; + out->dst = inst->vreg; + + out->binop.lhs = inst->operands[0]->vreg; + out->binop.rhs = inst->operands[1]->vreg; + break; + } + + case OP_SUB: { + CgInst* out = cg_emit(cg); + out->op = CG_SUB8; + out->dst = inst->vreg; + + out->binop.lhs = inst->operands[0]->vreg; + out->binop.rhs = inst->operands[1]->vreg; + break; + } + + case OP_MUL: { + CgInst* out = cg_emit(cg); + out->op = CG_MUL8; + out->dst = inst->vreg; + + out->binop.lhs = inst->operands[0]->vreg; + out->binop.rhs = inst->operands[1]->vreg; + break; + } + + default: + break; + } + + cg->result_vreg = inst->vreg; + } +} + +static PhysReg phys_alloc(VReg vreg, RegMap* map, size_t size) +{ + // 1. reuse if already assigned + if (vreg < size && map[vreg].assigned) + return map[vreg].reg; + + // 2. find free register + static PhysReg next = RAX; + + for (int i = 0; i < REG_COUNT; i++) { + PhysReg r = (next + i) % REG_COUNT; + + int used = 0; + for (size_t j = 0; j < size; j++) { + if (map[j].assigned && map[j].reg == r) { + used = 1; + break; + } + } + + if (!used) { + next = (r + 1) % REG_COUNT; + return r; + } + } + + // fallback (no spilling implemented yet) + return RAX; +} + +void cg_block_regalloc_x86(CgBlock* block) +{ + size_t n = block->vreg_map_size; + + for (size_t i = 0; i < n; i++) { + block->vreg_map[i].assigned = 0; + } + + for (size_t i = 0; i < block->count; i++) { + CgInst* inst = block->insts[i]; + + phys_alloc(inst->dst, block->vreg_map, n); + + if (inst->op == CG_ADD8 || inst->op == CG_SUB8 || inst->op == CG_MUL8) { + + phys_alloc(inst->binop.lhs, block->vreg_map, n); + phys_alloc(inst->binop.rhs, block->vreg_map, n); + } + } + + // enforce result vreg → RAX + VReg r = block->result_vreg; + + if (r < n) { + block->vreg_map[r].reg = RAX; + block->vreg_map[r].assigned = 1; + } +} + +static void test_ir_block_isel_add(void) +{ + IrBlock ir; + ir_block_init(&ir); + + // IR: 2 + 3 + + Expr two = { .type = EXPR_INT, .text = "2" }; + Expr three = { .type = EXPR_INT, .text = "3" }; + Expr add_ident = { .type = EXPR_IDENT, .text = "add" }; + + Expr sexpr = { .type = EXPR_SEXPR, + .sexpr + = { .items = (Expr*[]) { &add_ident, &two, &three }, .count = 3 } }; + + IrInst* result = ir_lower_expr(&ir, &sexpr); + assert(result != NULL); + + CgBlock cg; + cg_block_init(&cg); + + ir_block_isel_x86(&cg, &ir); + + // Expect: IMM 2, IMM 3, ADD + assert(cg.count == 3); + + // 1. load 2 + assert(cg.insts[0]->op == CG_IMM64); + assert(cg.insts[0]->imm == 2); + + // 2. load 3 + assert(cg.insts[1]->op == CG_IMM64); + assert(cg.insts[1]->imm == 3); + + // 3. add + assert(cg.insts[2]->op == CG_ADD8); + + // operands should reference IR vregs (not recomputed) + assert(cg.insts[2]->binop.lhs == ir.insts[0]->vreg); + assert(cg.insts[2]->binop.rhs == ir.insts[1]->vreg); + + cg_block_free(&cg); + ir_block_free(&ir); +} + +static void test_ir_block_isel_nested(void) +{ + IrBlock ir; + ir_block_init(&ir); + + // IR: 2 + (3 * 4) + + Expr two = { .type = EXPR_INT, .text = "2" }; + Expr three = { .type = EXPR_INT, .text = "3" }; + Expr four = { .type = EXPR_INT, .text = "4" }; + + Expr add_ident = { .type = EXPR_IDENT, .text = "add" }; + Expr mul_ident = { .type = EXPR_IDENT, .text = "mul" }; + + Expr mul_expr = { .type = EXPR_SEXPR, + .sexpr + = { .items = (Expr*[]) { &mul_ident, &three, &four }, .count = 3 } }; + + Expr add_expr = { .type = EXPR_SEXPR, + .sexpr + = { .items = (Expr*[]) { &add_ident, &two, &mul_expr }, .count = 3 } }; + + IrInst* result = ir_lower_expr(&ir, &add_expr); + assert(result != NULL); + + CgBlock cg; + cg_block_init(&cg); + + ir_block_isel_x86(&cg, &ir); + + // Expect IR produces: + // int 2, int 3, int 4, mul, add => 5 CG instructions + + assert(cg.count == 5); + + // 0: 2 + assert(cg.insts[0]->op == CG_IMM64); + assert(cg.insts[0]->imm == 2); + + // 1: 3 + assert(cg.insts[1]->op == CG_IMM64); + assert(cg.insts[1]->imm == 3); + + // 2: 4 + assert(cg.insts[2]->op == CG_IMM64); + assert(cg.insts[2]->imm == 4); + + // 3: mul + assert(cg.insts[3]->op == CG_MUL8); + + // 4: add + assert(cg.insts[4]->op == CG_ADD8); + + // MUL operands: 3 * 4 + assert(cg.insts[3]->binop.lhs == ir.insts[1]->vreg); + assert(cg.insts[3]->binop.rhs == ir.insts[2]->vreg); + + // ADD operands: 2 + (3*4) + assert(cg.insts[4]->binop.lhs == ir.insts[0]->vreg); + assert(cg.insts[4]->binop.rhs == ir.insts[3]->vreg); + + cg_block_free(&cg); + ir_block_free(&ir); +} + +static void test_cg_block_regalloc_add(void) +{ + // Build IR: 2 + 3 + IrBlock ir; + ir_block_init(&ir); + + Expr two = { .type = EXPR_INT, .text = "2" }; + Expr three = { .type = EXPR_INT, .text = "3" }; + Expr add_ident = { .type = EXPR_IDENT, .text = "add" }; + + Expr sexpr = { .type = EXPR_SEXPR, + .sexpr + = { .items = (Expr*[]) { &add_ident, &two, &three }, .count = 3 } }; + + IrInst* result = ir_lower_expr(&ir, &sexpr); + assert(result != NULL); + + CgBlock cg; + cg_block_init(&cg); + + ir_block_isel_x86(&cg, &ir); + + cg_block_regalloc_x86(&cg); + + // After RA: ADD must use physical registers, not vregs + CgInst* add = cg.insts[2]; + + assert(add->op == CG_ADD8); + + PhysReg lhs = add->binop.lhs; + PhysReg rhs = add->binop.rhs; + PhysReg dst = add->dst; + + // sanity: registers must be in valid range + assert(lhs < REG_COUNT); + assert(rhs < REG_COUNT); + assert(dst < REG_COUNT); + + // lhs and rhs should not equal invalid sentinel values + assert(lhs != (PhysReg)-1); + assert(rhs != (PhysReg)-1); + + cg_block_free(&cg); + ir_block_free(&ir); +} + +static void test_cg_block_regalloc_nested(void) +{ + // IR: 2 + (3 * 4) + IrBlock ir; + ir_block_init(&ir); + + Expr two = { .type = EXPR_INT, .text = "2" }; + Expr three = { .type = EXPR_INT, .text = "3" }; + Expr four = { .type = EXPR_INT, .text = "4" }; + + Expr add_ident = { .type = EXPR_IDENT, .text = "add" }; + Expr mul_ident = { .type = EXPR_IDENT, .text = "mul" }; + + Expr mul_expr = { .type = EXPR_SEXPR, + .sexpr + = { .items = (Expr*[]) { &mul_ident, &three, &four }, .count = 3 } }; + + Expr add_expr = { .type = EXPR_SEXPR, + .sexpr + = { .items = (Expr*[]) { &add_ident, &two, &mul_expr }, .count = 3 } }; + + IrInst* result = ir_lower_expr(&ir, &add_expr); + assert(result != NULL); + + CgBlock cg; + cg_block_init(&cg); + + ir_block_isel_x86(&cg, &ir); + + cg_block_regalloc_x86(&cg); + + // ADD instruction is last + CgInst* add = cg.insts[4]; + assert(add->op == CG_ADD8); + + // MUL instruction + CgInst* mul = cg.insts[3]; + assert(mul->op == CG_MUL8); + + // Check register validity + assert(add->binop.lhs < REG_COUNT); + assert(add->binop.rhs < REG_COUNT); + assert(add->dst < REG_COUNT); + + assert(mul->binop.lhs < REG_COUNT); + assert(mul->binop.rhs < REG_COUNT); + assert(mul->dst < REG_COUNT); + + // Critical correctness check: + // MUL result should be used by ADD + assert(add->binop.rhs == mul->dst); + + cg_block_free(&cg); + ir_block_free(&ir); +} + +void test_codegen_x86(void) +{ + test_ir_block_isel_add(); + test_ir_block_isel_nested(); + test_cg_block_regalloc_add(); + test_cg_block_regalloc_nested(); +} diff --git a/codegen_x86.h b/codegen_x86.h new file mode 100644 index 0000000..d010f01 --- /dev/null +++ b/codegen_x86.h @@ -0,0 +1,73 @@ +#ifndef CODEGEN_X86_H +#define CODEGEN_X86_H + +#include "arena.h" +#include "ir.h" +#include +#include + +typedef enum { + CG_MOV, + CG_ADD8, + CG_SUB8, + CG_MUL8, + CG_IMM64 +} CgOpCode; + +typedef enum { + RAX = 0, + RDX, + RCX, + RBX, + RSI, + RDI, + R8, + R9, + REG_COUNT +} PhysReg; + +typedef struct { + PhysReg reg; + int assigned; +} RegMap; + +typedef struct CgInst { + CgOpCode op; + + VReg dst; + + union { + struct { + VReg lhs; + VReg rhs; + } binop; + + uint64_t imm; + }; +} CgInst; + +typedef struct { + CgInst** insts; + size_t count; + size_t capacity; + + VReg next_vreg; + RegMap* vreg_map; + size_t vreg_map_size; + + Arena arena; + VReg result_vreg; +} CgBlock; + + +void cg_block_init(CgBlock* b); +void cg_block_free(CgBlock* b); +void cg_block_print_vreg(const CgBlock* block); +void cg_block_print_phys(const CgBlock* block); + +void ir_block_isel_x86(CgBlock* cg, const IrBlock* ir); +void cg_block_regalloc_x86(CgBlock* block); + +void test_codegen_x86(void); + +#endif diff --git a/compile_flags.txt b/compile_flags.txt new file mode 100644 index 0000000..069ad2b --- /dev/null +++ b/compile_flags.txt @@ -0,0 +1,6 @@ +-xc +-std=c23 +-Wall +-Wextra +-pedantic-errors +-Wno-empty-translation-unit diff --git a/ir.c b/ir.c new file mode 100644 index 0000000..482cdd0 --- /dev/null +++ b/ir.c @@ -0,0 +1,328 @@ +#include "ir.h" +#include "arena.h" +#include "parse.h" +#include +#include +#include +#include +#include +#include + +void ir_block_init(IrBlock* block) +{ + arena_init(&block->arena); + + block->next_vreg = 0; + block->insts = NULL; + block->count = 0; + block->capacity = 0; +} + +void ir_block_free(IrBlock* block) +{ + if (!block) + return; + + arena_free(&block->arena); + free(block->insts); +} + +static int ir_inst_index(IrBlock* block, IrInst* inst) +{ + for (size_t i = 0; i < block->count; i++) { + if (block->insts[i] == inst) + return (int)i; + } + return -1; +} + +void ir_block_print(IrBlock* block) +{ + if (!block) + return; + + for (size_t i = 0; i < block->count; i++) { + IrInst* inst = block->insts[i]; + + printf("%%%zu = ", i); + + switch (inst->op) { + case OP_INT: + printf("Int %llu\n", (unsigned long long)inst->value); + break; + + case OP_ADD: + case OP_SUB: + case OP_MUL: { + const char* op_str = inst->op == OP_ADD ? "Add" + : inst->op == OP_SUB ? "Sub" + : "Mul"; + + printf("%s ", op_str); + + for (size_t j = 0; j < inst->operand_count; j++) { + int idx = ir_inst_index(block, inst->operands[j]); + + if (idx < 0) { + printf(""); + } else { + printf("%%%d", idx); + } + + if (j + 1 < inst->operand_count) + printf(", "); + } + + printf("\n"); + break; + } + + default: + printf("UnknownOp\n"); + break; + } + } +} + +static void ir_block_emit(IrBlock* block, IrInst* inst) +{ + if (!block || !inst) + return; + + if (block->count == block->capacity) { + size_t new_cap = block->capacity ? block->capacity * 2 : 8; + + IrInst** new_buf = realloc(block->insts, new_cap * sizeof(IrInst*)); + if (!new_buf) + return; + + block->insts = new_buf; + block->capacity = new_cap; + } + + block->insts[block->count++] = inst; +} + +static IrInst* ir_alloc_inst(IrBlock* block) +{ + IrInst* inst = arena_alloc(&block->arena, sizeof(IrInst)); + if (!inst) + return NULL; + + memset(inst, 0, sizeof(IrInst)); + return inst; +} + +static IrInst* ir_new_int(IrBlock* block, uint64_t value) +{ + IrInst* inst = ir_alloc_inst(block); + if (!inst) + return NULL; + + inst->op = OP_INT; + inst->value = value; + inst->vreg = block->next_vreg++; + + return ir_block_emit(block, inst), inst; +} + +static IrInst* ir_new_binop(IrBlock* block, OpCode op, IrInst* a, IrInst* b) +{ + IrInst* inst = ir_alloc_inst(block); + if (!inst) + return NULL; + + inst->op = op; + inst->operand_count = 2; + inst->operands[0] = a; + inst->operands[1] = b; + inst->vreg = block->next_vreg++; + + return ir_block_emit(block, inst), inst; +} + +static OpCode op_from_ident_strict(const char* s) +{ + if (strcmp(s, "add") == 0) + return OP_ADD; + if (strcmp(s, "sub") == 0) + return OP_SUB; + if (strcmp(s, "mul") == 0) + return OP_MUL; + return (OpCode)-1; +} + +static bool is_int_literal(const char* s, uint64_t* out) +{ + if (!s || !*s) + return false; + char* end = NULL; + unsigned long long v = strtoull(s, &end, 10); + if (*end != '\0') + return false; + *out = (uint64_t)v; + return true; +} + +IrInst* ir_lower_expr(IrBlock* block, const Expr* expr) +{ + if (!expr || !block) + return NULL; + + switch (expr->type) { + + case EXPR_INT: { + uint64_t value; + if (!is_int_literal(expr->text, &value)) { + return NULL; // strict: invalid integer literal + } + return ir_new_int(block, value); + } + + case EXPR_IDENT: + // identifiers not supported in this IR + return NULL; + + case EXPR_SEXPR: { + size_t n = expr->sexpr.count; + + if (n == 0) { + return NULL; // malformed: empty s-expression + } + + const Expr* head = expr->sexpr.items[0]; + if (!head || head->type != EXPR_IDENT) { + return NULL; // strict: first element must be operator + } + + OpCode op = op_from_ident_strict(head->text); + if (op == (OpCode)-1) { + return NULL; // unknown operator + } + + size_t argc = n - 1; + + // STRICT ARITY RULES (you can adjust these) + if (argc < 2) { + return NULL; // e.g. (add x) is invalid + } + + if (argc > IR_MAX_ARITY) { + return NULL; // prevent overflow + } + + // Lower first operand + IrInst* first = ir_lower_expr(block, expr->sexpr.items[1]); + if (!first) + return NULL; + + IrInst* acc = first; + + // Left-associative lowering: + // (add a b c d) => (((a + b) + c) + d) + for (size_t i = 2; i < n; i++) { + IrInst* rhs = ir_lower_expr(block, expr->sexpr.items[i]); + if (!rhs) + return NULL; + + IrInst* tmp = ir_new_binop(block, op, acc, rhs); + if (!tmp) + return NULL; + + acc = tmp; + } + + return acc; + } + } + + return NULL; +} + +static void test_ir_lower_simple_add(void) +{ + IrBlock block; + ir_block_init(&block); + + // Build AST: (add 2 3) + Expr two = { .type = EXPR_INT, .text = "2" }; + Expr three = { .type = EXPR_INT, .text = "3" }; + + Expr add_ident = { .type = EXPR_IDENT, .text = "add" }; + + Expr sexpr = { .type = EXPR_SEXPR, + .sexpr + = { .items = (Expr*[]) { &add_ident, &two, &three }, .count = 3 } }; + + IrInst* result = ir_lower_expr(&block, &sexpr); + assert(result != NULL); + + // Check instruction count + assert(block.count == 3); + + // Int 2 + assert(block.insts[0]->op == OP_INT); + assert(block.insts[0]->value == 2); + + // Int 3 + assert(block.insts[1]->op == OP_INT); + assert(block.insts[1]->value == 3); + + // Add + assert(block.insts[2]->op == OP_ADD); + + // Operand wiring + assert(block.insts[2]->operands[0] == block.insts[0]); + assert(block.insts[2]->operands[1] == block.insts[1]); + + ir_block_free(&block); +} + +static void test_ir_lower_nested_expr(void) +{ + IrBlock block; + ir_block_init(&block); + + Expr two = { .type = EXPR_INT, .text = "2" }; + Expr three = { .type = EXPR_INT, .text = "3" }; + Expr four = { .type = EXPR_INT, .text = "4" }; + + Expr mul_ident = { .type = EXPR_IDENT, .text = "mul" }; + Expr add_ident = { .type = EXPR_IDENT, .text = "add" }; + + Expr mul_expr = { .type = EXPR_SEXPR, + .sexpr + = { .items = (Expr*[]) { &mul_ident, &three, &four }, .count = 3 } }; + + Expr add_expr = { .type = EXPR_SEXPR, + .sexpr + = { .items = (Expr*[]) { &add_ident, &two, &mul_expr }, .count = 3 } }; + + IrInst* result = ir_lower_expr(&block, &add_expr); + assert(result != NULL); + + assert(block.count == 5); + + assert(block.insts[0]->op == OP_INT && block.insts[0]->value == 2); + assert(block.insts[1]->op == OP_INT && block.insts[1]->value == 3); + assert(block.insts[2]->op == OP_INT && block.insts[2]->value == 4); + + assert(block.insts[3]->op == OP_MUL); + assert(block.insts[4]->op == OP_ADD); + + // Verify MUL operands + assert(block.insts[3]->operands[0] == block.insts[1]); + assert(block.insts[3]->operands[1] == block.insts[2]); + + // Verify ADD operands + assert(block.insts[4]->operands[0] == block.insts[0]); + assert(block.insts[4]->operands[1] == block.insts[3]); + + ir_block_free(&block); +} + +void test_ast_lower(void) +{ + test_ir_lower_simple_add(); + test_ir_lower_nested_expr(); +} diff --git a/ir.h b/ir.h new file mode 100644 index 0000000..eb49111 --- /dev/null +++ b/ir.h @@ -0,0 +1,54 @@ +#ifndef IR_H +#define IR_H + +#include "arena.h" +#include "parse.h" +#include +#include + +typedef uint32_t VReg; + +#define IR_MAX_ARITY 8 // adjust as needed + +typedef enum { + OP_INT, + OP_ADD, + OP_SUB, + OP_MUL +} OpCode; + +typedef struct IrInst IrInst; + +struct IrInst { + OpCode op; + VReg vreg; + + union { + uint64_t value; // OP_INT + + struct { + size_t operand_count; + IrInst* operands[IR_MAX_ARITY]; // inline storage + }; + }; +}; + + +typedef struct { + Arena arena; + VReg next_vreg; + + IrInst** insts; + size_t count; + size_t capacity; +} IrBlock; + +void ir_block_init(IrBlock* block); +void ir_block_free(IrBlock* block); +void ir_block_print(IrBlock* block); + +IrInst* ir_lower_expr(IrBlock* block, const Expr* expr); + +void test_ast_lower(void); + +#endif diff --git a/jit_x86.c b/jit_x86.c new file mode 100644 index 0000000..e210575 --- /dev/null +++ b/jit_x86.c @@ -0,0 +1,188 @@ +#include "jit_x86.h" +#include "codegen_x86.h" +#include +#include +#include +#include +#include +#include + +static uint8_t reg_enc(PhysReg r) +{ + switch (r) { + case RAX: + return 0; + case RCX: + return 1; + case RDX: + return 2; + case RBX: + return 3; + case RSI: + return 6; + case RDI: + return 7; + case R8: + return 8; + case R9: + return 9; + default: + return 0; + } +} + +typedef struct { + uint8_t* buf; + size_t cap; + size_t len; +} AsmBuf; + +static void emit8(AsmBuf* a, uint8_t v) +{ + if (a->len >= a->cap) { + fprintf(stderr, "JIT buffer overflow\n"); + abort(); + } + a->buf[a->len++] = v; +} + +[[maybe_unused]] +static void emit32(AsmBuf* a, uint32_t v) +{ + if (a->len >= a->cap) { + fprintf(stderr, "JIT buffer overflow\n"); + abort(); + } + memcpy(&a->buf[a->len], &v, 4); + a->len += 4; +} + +static void emit64(AsmBuf* a, uint64_t v) +{ + if (a->len >= a->cap) { + fprintf(stderr, "JIT buffer overflow\n"); + abort(); + } + memcpy(&a->buf[a->len], &v, 8); + a->len += 8; +} + +static void emit_mov_imm64(AsmBuf* a, PhysReg dst, uint64_t imm) +{ + uint8_t r = reg_enc(dst); + + // mov r64, imm64 = 48 B8+rd imm64 + emit8(a, 0x48); + emit8(a, 0xB8 + r); + emit64(a, imm); +} + +static uint8_t rex_enc(uint8_t dst, uint8_t src) +{ + return 0x40 | ((dst & 8) ? 0x01 : 0) | // B + ((src & 8) ? 0x04 : 0); // R +} + +static void emit_alu_rr( + AsmBuf* a, uint8_t rex, uint8_t op, PhysReg dst, PhysReg src) +{ + (void)rex; + + uint8_t d = reg_enc(dst); + uint8_t s = reg_enc(src); + + emit8(a, rex_enc(d, s)); + emit8(a, op); + + uint8_t modrm = 0xC0 | ((s & 7) << 3) | (d & 7); + emit8(a, modrm); +} + +static void emit_add(AsmBuf* a, PhysReg d, PhysReg s) +{ + emit_alu_rr(a, 0x48, 0x01, d, s); +} + +static void emit_sub(AsmBuf* a, PhysReg d, PhysReg s) +{ + emit_alu_rr(a, 0x48, 0x29, d, s); +} + +static void emit_mul(AsmBuf* a, PhysReg d, PhysReg s) +{ + emit8(a, 0x48); + emit8(a, 0x0F); + emit8(a, 0xAF); + emit8(a, 0xC0 | (reg_enc(s) << 3) | reg_enc(d)); +} + +static void emit_ret(AsmBuf* a) +{ + emit8(a, 0xC3); +} + +static size_t align_page(size_t n) +{ + size_t page = sysconf(_SC_PAGESIZE); + return (n + page - 1) & ~(page - 1); +} + +JitFn cg_block_emit_x86_machine_code(CgBlock* block) +{ + AsmBuf a = { 0 }; + + a.cap = align_page(1024); + a.len = 0; + a.buf = mmap(NULL, + a.cap, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, + -1, + 0); + + if (a.buf == MAP_FAILED) { + return NULL; + } + + for (size_t i = 0; i < block->count; i++) { + CgInst* inst = block->insts[i]; + + switch (inst->op) { + + case CG_IMM64: + emit_mov_imm64(&a, inst->dst, inst->imm); + break; + + case CG_ADD8: + emit_add(&a, inst->dst, inst->binop.rhs); + break; + + case CG_SUB8: + emit_sub(&a, inst->dst, inst->binop.rhs); + break; + + case CG_MUL8: + emit_mul(&a, inst->dst, inst->binop.rhs); + break; + + default: + break; + } + } + + // ensure result is in rax (ABI return register) + emit_ret(&a); + + // make executable + mprotect(a.buf, align_page(a.len), PROT_READ | PROT_EXEC); + + for (size_t i = 0; i < a.len; i++) { + printf("%02x ", a.buf[i]); + } + printf("\n"); + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wpedantic" + return (JitFn)a.buf; +#pragma GCC diagnostic pop +} diff --git a/jit_x86.h b/jit_x86.h new file mode 100644 index 0000000..c4226e7 --- /dev/null +++ b/jit_x86.h @@ -0,0 +1,11 @@ +#ifndef JIT_X86_H +#define JIT_X86_H + +#include "codegen_x86.h" +#include + +typedef uint64_t (*JitFn)(void); + +JitFn cg_block_emit_x86_machine_code(CgBlock* block); + +#endif \ No newline at end of file diff --git a/main.c b/main.c new file mode 100644 index 0000000..0779f39 --- /dev/null +++ b/main.c @@ -0,0 +1,105 @@ +#include "codegen_x86.h" +#include "ir.h" +#include "jit_x86.h" +#include "parse.h" +#include +#include +#include +#include + +static char* read_file(const char* filename) +{ + FILE* file = fopen(filename, "rb"); + + if (!file) { + perror("fopen"); + return NULL; + } + + if (fseek(file, 0, SEEK_END) != 0) { + perror("fseek"); + fclose(file); + return NULL; + } + + long size = ftell(file); + + if (size < 0) { + perror("ftell"); + fclose(file); + return NULL; + } + + rewind(file); + + char* buffer = malloc((size_t)size + 1); + + if (!buffer) { + fprintf(stderr, "Out of memory\n"); + fclose(file); + return NULL; + } + + size_t bytes_read = fread(buffer, 1, (size_t)size, file); + + if (bytes_read != (size_t)size) { + perror("fread"); + free(buffer); + fclose(file); + return NULL; + } + + buffer[size] = '\0'; + + fclose(file); + + return buffer; +} + +int main(int argc, char** argv) +{ + if (argc > 1 && strcmp(argv[1], "--test") == 0) { + test_parse(); + test_ast_lower(); + test_codegen_x86(); + return 0; + } + + assert(argc > 1); + char* text = read_file(argv[1]); + printf("--- text ---\n"); + puts(text); + + Expr* expr = parse(text); + free(text); + printf("--- ast ---\n"); + expr_print(expr); + + IrBlock ir_block; + ir_block_init(&ir_block); + + ir_lower_expr(&ir_block, expr); + expr_free(expr); + printf("\n--- ir ---\n"); + ir_block_print(&ir_block); + + CgBlock cg_block; + cg_block_init(&cg_block); + + ir_block_isel_x86(&cg_block, &ir_block); + ir_block_free(&ir_block); + printf("--- isel ---\n"); + cg_block_print_vreg(&cg_block); + + printf("--- regalloc ---\n"); + cg_block_regalloc_x86(&cg_block); + cg_block_print_phys(&cg_block); + + JitFn fn = cg_block_emit_x86_machine_code(&cg_block); + cg_block_free(&cg_block); + printf("--- result ---\n"); + uint64_t result = fn(); + printf("%lu\n", result); + + return 0; +} diff --git a/parse.c b/parse.c new file mode 100644 index 0000000..7e7c3cc --- /dev/null +++ b/parse.c @@ -0,0 +1,354 @@ +// parser.c +#include "parse.h" +#include +#include +#include +#include +#include + +typedef enum { + TOKEN_LPAREN, + TOKEN_RPAREN, + TOKEN_IDENT, + TOKEN_INT, + TOKEN_EOF, +} TokenType; + +typedef struct { + TokenType type; + char* text; +} Token; + +typedef struct { + const char* input; + size_t pos; +} Lexer; + +typedef struct { + Lexer lexer; + Token current; +} Parser; + +/* ========================= + Lexer + ========================= */ + +static char current_char(Lexer* lexer) +{ + return lexer->input[lexer->pos]; +} + +static void advance(Lexer* lexer) +{ + if (current_char(lexer) != '\0') { + lexer->pos++; + } +} + +static void skip_whitespace(Lexer* lexer) +{ + while (isspace(current_char(lexer))) { + advance(lexer); + } +} + +static Token make_ident(Lexer* lexer) +{ + size_t start = lexer->pos; + + while (isalnum(current_char(lexer)) || current_char(lexer) == '_') { + advance(lexer); + } + + return (Token) { + .type = TOKEN_IDENT, + .text = strndup(lexer->input + start, lexer->pos - start), + }; +} + +static Token make_int(Lexer* lexer) +{ + size_t start = lexer->pos; + + while (isdigit(current_char(lexer))) { + advance(lexer); + } + + return (Token) { + .type = TOKEN_INT, + .text = strndup(lexer->input + start, lexer->pos - start), + }; +} + +static Token next_token(Lexer* lexer) +{ + skip_whitespace(lexer); + + char c = current_char(lexer); + + switch (c) { + case '\0': + return (Token) { + .type = TOKEN_EOF, + .text = NULL, + }; + + case '(': + advance(lexer); + + return (Token) { + .type = TOKEN_LPAREN, + .text = NULL, + }; + + case ')': + advance(lexer); + + return (Token) { + .type = TOKEN_RPAREN, + .text = NULL, + }; + + default: + break; + } + + if (isalpha(c) || c == '_') { + return make_ident(lexer); + } + + if (isdigit(c)) { + return make_int(lexer); + } + + fprintf(stderr, "Unexpected character: '%c'\n", c); + exit(EXIT_FAILURE); +} + +/* ========================= + Parser + ========================= */ + +static void parser_advance(Parser* parser) +{ + free(parser->current.text); + parser->current = next_token(&parser->lexer); +} + +static void parser_expect(Parser* parser, TokenType expected) +{ + if (parser->current.type != expected) { + fprintf(stderr, "Unexpected token\n"); + exit(EXIT_FAILURE); + } +} + +static Expr* make_atom_expr(ExprType type, char* text) +{ + Expr* expr = malloc(sizeof(Expr)); + + expr->type = type; + expr->text = text; + + return expr; +} + +static Expr* make_sexpr(void) +{ + Expr* expr = malloc(sizeof(Expr)); + + expr->type = EXPR_SEXPR; + expr->sexpr.items = NULL; + expr->sexpr.count = 0; + + return expr; +} + +static void sexpr_push(Expr* sexpr, Expr* item) +{ + sexpr->sexpr.items + = realloc(sexpr->sexpr.items, sizeof(Expr*) * (sexpr->sexpr.count + 1)); + + sexpr->sexpr.items[sexpr->sexpr.count++] = item; +} + +static Expr* parse_expr(Parser* parser); + +static Expr* parse_list(Parser* parser) +{ + parser_expect(parser, TOKEN_LPAREN); + parser_advance(parser); + + Expr* sexpr = make_sexpr(); + + while (parser->current.type != TOKEN_RPAREN) { + Expr* expr = parse_expr(parser); + sexpr_push(sexpr, expr); + } + + parser_expect(parser, TOKEN_RPAREN); + parser_advance(parser); + + return sexpr; +} + +static Expr* parse_expr(Parser* parser) +{ + switch (parser->current.type) { + case TOKEN_IDENT: { + char* text = strdup(parser->current.text); + + parser_advance(parser); + + return make_atom_expr(EXPR_IDENT, text); + } + + case TOKEN_INT: { + char* text = strdup(parser->current.text); + + parser_advance(parser); + + return make_atom_expr(EXPR_INT, text); + } + + case TOKEN_LPAREN: + return parse_list(parser); + + default: + fprintf(stderr, "Unexpected token in expression\n"); + exit(EXIT_FAILURE); + } +} + +/* ========================= + Public API + ========================= */ + +Expr* parse(const char* source) +{ + Parser parser = { + .lexer = { + .input = source, + .pos = 0, + }, + .current = {0}, + }; + + parser.current = next_token(&parser.lexer); + + Expr* expr = parse_expr(&parser); + + if (parser.current.type != TOKEN_EOF) { + fprintf(stderr, "Expected EOF\n"); + exit(EXIT_FAILURE); + } + + free(parser.current.text); + + return expr; +} + +/* ========================= + Debug Printing + ========================= */ + +static void print_sexpr(Expr* expr) +{ + printf("SExpr [ "); + + for (size_t i = 0; i < expr->sexpr.count; i++) { + expr_print(expr->sexpr.items[i]); + + if (i + 1 < expr->sexpr.count) { + printf(", "); + } + } + + printf(" ]"); +} + +void expr_print(Expr* expr) +{ + switch (expr->type) { + case EXPR_IDENT: + printf("Ident(\"%s\")", expr->text); + break; + + case EXPR_INT: + printf("Int(%s)", expr->text); + break; + + case EXPR_SEXPR: + print_sexpr(expr); + break; + } +} + +/* ========================= + Memory Cleanup + ========================= */ + +void expr_free(Expr* expr) +{ + switch (expr->type) { + case EXPR_IDENT: + case EXPR_INT: + free(expr->text); + break; + + case EXPR_SEXPR: + for (size_t i = 0; i < expr->sexpr.count; i++) { + expr_free(expr->sexpr.items[i]); + } + + free(expr->sexpr.items); + break; + } + + free(expr); +} + +/* ========================= + Unit Tests + ========================= */ + +static void test_simple_list(void) +{ + Expr* expr = parse("(add 2)"); + + assert(expr->type == EXPR_SEXPR); + assert(expr->sexpr.count == 2); + + assert(expr->sexpr.items[0]->type == EXPR_IDENT); + assert(strcmp(expr->sexpr.items[0]->text, "add") == 0); + + assert(expr->sexpr.items[1]->type == EXPR_INT); + assert(strcmp(expr->sexpr.items[1]->text, "2") == 0); + + expr_free(expr); +} + +static void test_nested_list(void) +{ + Expr* expr = parse("(add 2 (mul 3 4))"); + + assert(expr->type == EXPR_SEXPR); + assert(expr->sexpr.count == 3); + + Expr* nested = expr->sexpr.items[2]; + + assert(nested->type == EXPR_SEXPR); + assert(nested->sexpr.count == 3); + + assert(strcmp(nested->sexpr.items[0]->text, "mul") == 0); + assert(strcmp(nested->sexpr.items[1]->text, "3") == 0); + assert(strcmp(nested->sexpr.items[2]->text, "4") == 0); + + expr_free(expr); +} + +void test_parse(void) +{ + test_simple_list(); + test_nested_list(); +} diff --git a/parse.h b/parse.h new file mode 100644 index 0000000..1cc0e49 --- /dev/null +++ b/parse.h @@ -0,0 +1,33 @@ +#ifndef PARSE_H +#define PARSE_H + +#include + +typedef enum { + EXPR_IDENT, + EXPR_INT, + EXPR_SEXPR, +} ExprType; + +typedef struct Expr Expr; + +struct Expr { + ExprType type; + + union { + char* text; + + struct { + Expr** items; + size_t count; + } sexpr; + }; +}; + +Expr* parse(const char* source); +void expr_free(Expr* expr); +void expr_print(Expr* expr); + +void test_parse(void); + +#endif diff --git a/test.lisp b/test.lisp new file mode 100644 index 0000000..2158b37 --- /dev/null +++ b/test.lisp @@ -0,0 +1,2 @@ + +(add (mul 4 6) (mul (add 3 4) (add 5 6)))