This commit is contained in:
sfja 2026-05-21 00:31:25 +02:00
commit e2c5f1c66e
16 changed files with 1773 additions and 0 deletions

14
.clang-format Normal file
View File

@ -0,0 +1,14 @@
Language: Cpp
BasedOnStyle: WebKit
IndentWidth: 4
ColumnLimit: 80
IndentCaseLabels: true
InsertNewlineAtEOF: true
AllowShortFunctionsOnASingleLine: None
BinPackArguments: false
AllowAllArgumentsOnNextLine: true
BinPackParameters: false
AllowAllParametersOfDeclarationOnNextLine: true

2
.gitignore vendored Normal file
View File

@ -0,0 +1,2 @@
build/
.vscode/

25
Makefile Normal file
View File

@ -0,0 +1,25 @@
CC = gcc
CFLAGS = -std=c23 \
-Wall \
-Wextra \
-pedantic-errors \
-g \
-fsanitize=address
BUILD_DIR = build
TARGET = $(BUILD_DIR)/main
SRC = main.c parse.c ir.c arena.c codegen_x86.c jit_x86.c
all: $(TARGET)
$(BUILD_DIR):
mkdir -p $(BUILD_DIR)
$(TARGET): $(SRC) | $(BUILD_DIR)
$(CC) $(CFLAGS) $(SRC) -o $(TARGET)
clean:
rm -rf $(BUILD_DIR)
.PHONY: all clean

43
arena.c Normal file
View File

@ -0,0 +1,43 @@
#include "arena.h"
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
void arena_init(Arena* a)
{
a->data = NULL;
a->size = 0;
a->capacity = 0;
}
void* arena_alloc(Arena* a, size_t size)
{
// simple alignment to 8 bytes
size = (size + 7) & ~((size_t)7);
if (a->size + size > a->capacity) {
size_t new_cap = a->capacity ? a->capacity * 2 : 1024;
while (new_cap < a->size + size)
new_cap *= 2;
uint8_t* new_data = realloc(a->data, new_cap);
if (!new_data)
return NULL;
a->data = new_data;
a->capacity = new_cap;
}
void* ptr = a->data + a->size;
a->size += size;
return ptr;
}
void arena_free(Arena* a)
{
free(a->data);
a->data = NULL;
a->size = 0;
a->capacity = 0;
}

18
arena.h Normal file
View File

@ -0,0 +1,18 @@
#ifndef ARENA_H
#define ARENA_H
#include <stdlib.h>
#include <stdint.h>
#include <string.h>
typedef struct {
uint8_t* data;
size_t size;
size_t capacity;
} Arena;
void arena_init(Arena* a);
void* arena_alloc(Arena* a, size_t size);
void arena_free(Arena* a);
#endif

517
codegen_x86.c Normal file
View File

@ -0,0 +1,517 @@
#include "codegen_x86.h"
#include "ir.h"
#include <assert.h>
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
void cg_block_init(CgBlock* b)
{
arena_init(&b->arena);
b->count = 0;
b->capacity = 64;
b->insts = arena_alloc(&b->arena, b->capacity * sizeof(CgInst*));
b->vreg_map = NULL;
b->next_vreg = 1;
}
void cg_block_free(CgBlock* b)
{
arena_free(&b->arena);
if (b->vreg_map)
free(b->vreg_map);
}
static const char* phys_reg_name(PhysReg r)
{
switch (r) {
case RAX:
return "rax";
case RDX:
return "rdx";
case RCX:
return "rcx";
case RBX:
return "rbx";
case RSI:
return "rsi";
case RDI:
return "rdi";
case R8:
return "r8";
case R9:
return "r9";
default:
return "unknown";
}
}
static void print_vreg(VReg v)
{
printf("v%u", v);
}
void cg_block_print_vreg(const CgBlock* block)
{
printf("=== CgBlock (VREG view, %zu insts) ===\n", block->count);
for (size_t i = 0; i < block->count; i++) {
CgInst* inst = block->insts[i];
printf("%zu: ", i);
switch (inst->op) {
case CG_IMM64:
print_vreg(inst->dst);
printf(" = IMM64 %llu\n", (unsigned long long)inst->imm);
break;
case CG_ADD8:
print_vreg(inst->dst);
printf(" = ADD8 ");
print_vreg(inst->binop.lhs);
printf(", ");
print_vreg(inst->binop.rhs);
printf("\n");
break;
case CG_SUB8:
print_vreg(inst->dst);
printf(" = SUB8 ");
print_vreg(inst->binop.lhs);
printf(", ");
print_vreg(inst->binop.rhs);
printf("\n");
break;
case CG_MUL8:
print_vreg(inst->dst);
printf(" = MUL8 ");
print_vreg(inst->binop.lhs);
printf(", ");
print_vreg(inst->binop.rhs);
printf("\n");
break;
default:
printf("???\n");
break;
}
}
printf("=====================================\n");
}
void cg_block_print_phys(const CgBlock* block)
{
printf("=== CgBlock (PHYS REG view, %zu insts) ===\n", block->count);
for (size_t i = 0; i < block->count; i++) {
CgInst* inst = block->insts[i];
printf("%zu: ", i);
switch (inst->op) {
case CG_IMM64:
printf("%s = IMM64 %llu\n",
phys_reg_name(block->vreg_map[inst->dst].reg),
(unsigned long long)inst->imm);
break;
case CG_ADD8:
printf("%s = ADD8 %s, %s\n",
phys_reg_name(block->vreg_map[inst->dst].reg),
phys_reg_name(block->vreg_map[inst->binop.lhs].reg),
phys_reg_name(block->vreg_map[inst->binop.rhs].reg));
break;
case CG_SUB8:
printf("%s = SUB8 %s, %s\n",
phys_reg_name(block->vreg_map[inst->dst].reg),
phys_reg_name(block->vreg_map[inst->binop.lhs].reg),
phys_reg_name(block->vreg_map[inst->binop.rhs].reg));
break;
case CG_MUL8:
printf("%s = MUL8 %s, %s\n",
phys_reg_name(block->vreg_map[inst->dst].reg),
phys_reg_name(block->vreg_map[inst->binop.lhs].reg),
phys_reg_name(block->vreg_map[inst->binop.rhs].reg));
break;
default:
printf("? = UNKNOWN OP %d\n", inst->op);
break;
}
}
printf("========================================\n");
}
static void cg_grow(CgBlock* b)
{
size_t new_cap = b->capacity * 2;
CgInst** new_arr = arena_alloc(&b->arena, new_cap * sizeof(CgInst*));
memcpy(new_arr, b->insts, b->count * sizeof(CgInst*));
b->insts = new_arr;
b->capacity = new_cap;
}
static CgInst* cg_emit(CgBlock* b)
{
if (b->count == b->capacity) {
cg_grow(b);
}
CgInst* inst = arena_alloc(&b->arena, sizeof(CgInst));
b->insts[b->count++] = inst;
return inst;
}
void ir_block_isel_x86(CgBlock* cg, const IrBlock* ir)
{
cg->count = 0;
cg->next_vreg = 1;
cg->vreg_map_size = ir->next_vreg;
cg->vreg_map = calloc(cg->vreg_map_size, sizeof(RegMap));
for (size_t i = 0; i < ir->count; i++) {
IrInst* inst = ir->insts[i];
switch (inst->op) {
case OP_INT: {
CgInst* out = cg_emit(cg);
out->op = CG_IMM64;
out->dst = inst->vreg;
out->imm = inst->value;
break;
}
case OP_ADD: {
CgInst* out = cg_emit(cg);
out->op = CG_ADD8;
out->dst = inst->vreg;
out->binop.lhs = inst->operands[0]->vreg;
out->binop.rhs = inst->operands[1]->vreg;
break;
}
case OP_SUB: {
CgInst* out = cg_emit(cg);
out->op = CG_SUB8;
out->dst = inst->vreg;
out->binop.lhs = inst->operands[0]->vreg;
out->binop.rhs = inst->operands[1]->vreg;
break;
}
case OP_MUL: {
CgInst* out = cg_emit(cg);
out->op = CG_MUL8;
out->dst = inst->vreg;
out->binop.lhs = inst->operands[0]->vreg;
out->binop.rhs = inst->operands[1]->vreg;
break;
}
default:
break;
}
cg->result_vreg = inst->vreg;
}
}
static PhysReg phys_alloc(VReg vreg, RegMap* map, size_t size)
{
// 1. reuse if already assigned
if (vreg < size && map[vreg].assigned)
return map[vreg].reg;
// 2. find free register
static PhysReg next = RAX;
for (int i = 0; i < REG_COUNT; i++) {
PhysReg r = (next + i) % REG_COUNT;
int used = 0;
for (size_t j = 0; j < size; j++) {
if (map[j].assigned && map[j].reg == r) {
used = 1;
break;
}
}
if (!used) {
next = (r + 1) % REG_COUNT;
return r;
}
}
// fallback (no spilling implemented yet)
return RAX;
}
void cg_block_regalloc_x86(CgBlock* block)
{
size_t n = block->vreg_map_size;
for (size_t i = 0; i < n; i++) {
block->vreg_map[i].assigned = 0;
}
for (size_t i = 0; i < block->count; i++) {
CgInst* inst = block->insts[i];
phys_alloc(inst->dst, block->vreg_map, n);
if (inst->op == CG_ADD8 || inst->op == CG_SUB8 || inst->op == CG_MUL8) {
phys_alloc(inst->binop.lhs, block->vreg_map, n);
phys_alloc(inst->binop.rhs, block->vreg_map, n);
}
}
// enforce result vreg → RAX
VReg r = block->result_vreg;
if (r < n) {
block->vreg_map[r].reg = RAX;
block->vreg_map[r].assigned = 1;
}
}
static void test_ir_block_isel_add(void)
{
IrBlock ir;
ir_block_init(&ir);
// IR: 2 + 3
Expr two = { .type = EXPR_INT, .text = "2" };
Expr three = { .type = EXPR_INT, .text = "3" };
Expr add_ident = { .type = EXPR_IDENT, .text = "add" };
Expr sexpr = { .type = EXPR_SEXPR,
.sexpr
= { .items = (Expr*[]) { &add_ident, &two, &three }, .count = 3 } };
IrInst* result = ir_lower_expr(&ir, &sexpr);
assert(result != NULL);
CgBlock cg;
cg_block_init(&cg);
ir_block_isel_x86(&cg, &ir);
// Expect: IMM 2, IMM 3, ADD
assert(cg.count == 3);
// 1. load 2
assert(cg.insts[0]->op == CG_IMM64);
assert(cg.insts[0]->imm == 2);
// 2. load 3
assert(cg.insts[1]->op == CG_IMM64);
assert(cg.insts[1]->imm == 3);
// 3. add
assert(cg.insts[2]->op == CG_ADD8);
// operands should reference IR vregs (not recomputed)
assert(cg.insts[2]->binop.lhs == ir.insts[0]->vreg);
assert(cg.insts[2]->binop.rhs == ir.insts[1]->vreg);
cg_block_free(&cg);
ir_block_free(&ir);
}
static void test_ir_block_isel_nested(void)
{
IrBlock ir;
ir_block_init(&ir);
// IR: 2 + (3 * 4)
Expr two = { .type = EXPR_INT, .text = "2" };
Expr three = { .type = EXPR_INT, .text = "3" };
Expr four = { .type = EXPR_INT, .text = "4" };
Expr add_ident = { .type = EXPR_IDENT, .text = "add" };
Expr mul_ident = { .type = EXPR_IDENT, .text = "mul" };
Expr mul_expr = { .type = EXPR_SEXPR,
.sexpr
= { .items = (Expr*[]) { &mul_ident, &three, &four }, .count = 3 } };
Expr add_expr = { .type = EXPR_SEXPR,
.sexpr
= { .items = (Expr*[]) { &add_ident, &two, &mul_expr }, .count = 3 } };
IrInst* result = ir_lower_expr(&ir, &add_expr);
assert(result != NULL);
CgBlock cg;
cg_block_init(&cg);
ir_block_isel_x86(&cg, &ir);
// Expect IR produces:
// int 2, int 3, int 4, mul, add => 5 CG instructions
assert(cg.count == 5);
// 0: 2
assert(cg.insts[0]->op == CG_IMM64);
assert(cg.insts[0]->imm == 2);
// 1: 3
assert(cg.insts[1]->op == CG_IMM64);
assert(cg.insts[1]->imm == 3);
// 2: 4
assert(cg.insts[2]->op == CG_IMM64);
assert(cg.insts[2]->imm == 4);
// 3: mul
assert(cg.insts[3]->op == CG_MUL8);
// 4: add
assert(cg.insts[4]->op == CG_ADD8);
// MUL operands: 3 * 4
assert(cg.insts[3]->binop.lhs == ir.insts[1]->vreg);
assert(cg.insts[3]->binop.rhs == ir.insts[2]->vreg);
// ADD operands: 2 + (3*4)
assert(cg.insts[4]->binop.lhs == ir.insts[0]->vreg);
assert(cg.insts[4]->binop.rhs == ir.insts[3]->vreg);
cg_block_free(&cg);
ir_block_free(&ir);
}
static void test_cg_block_regalloc_add(void)
{
// Build IR: 2 + 3
IrBlock ir;
ir_block_init(&ir);
Expr two = { .type = EXPR_INT, .text = "2" };
Expr three = { .type = EXPR_INT, .text = "3" };
Expr add_ident = { .type = EXPR_IDENT, .text = "add" };
Expr sexpr = { .type = EXPR_SEXPR,
.sexpr
= { .items = (Expr*[]) { &add_ident, &two, &three }, .count = 3 } };
IrInst* result = ir_lower_expr(&ir, &sexpr);
assert(result != NULL);
CgBlock cg;
cg_block_init(&cg);
ir_block_isel_x86(&cg, &ir);
cg_block_regalloc_x86(&cg);
// After RA: ADD must use physical registers, not vregs
CgInst* add = cg.insts[2];
assert(add->op == CG_ADD8);
PhysReg lhs = add->binop.lhs;
PhysReg rhs = add->binop.rhs;
PhysReg dst = add->dst;
// sanity: registers must be in valid range
assert(lhs < REG_COUNT);
assert(rhs < REG_COUNT);
assert(dst < REG_COUNT);
// lhs and rhs should not equal invalid sentinel values
assert(lhs != (PhysReg)-1);
assert(rhs != (PhysReg)-1);
cg_block_free(&cg);
ir_block_free(&ir);
}
static void test_cg_block_regalloc_nested(void)
{
// IR: 2 + (3 * 4)
IrBlock ir;
ir_block_init(&ir);
Expr two = { .type = EXPR_INT, .text = "2" };
Expr three = { .type = EXPR_INT, .text = "3" };
Expr four = { .type = EXPR_INT, .text = "4" };
Expr add_ident = { .type = EXPR_IDENT, .text = "add" };
Expr mul_ident = { .type = EXPR_IDENT, .text = "mul" };
Expr mul_expr = { .type = EXPR_SEXPR,
.sexpr
= { .items = (Expr*[]) { &mul_ident, &three, &four }, .count = 3 } };
Expr add_expr = { .type = EXPR_SEXPR,
.sexpr
= { .items = (Expr*[]) { &add_ident, &two, &mul_expr }, .count = 3 } };
IrInst* result = ir_lower_expr(&ir, &add_expr);
assert(result != NULL);
CgBlock cg;
cg_block_init(&cg);
ir_block_isel_x86(&cg, &ir);
cg_block_regalloc_x86(&cg);
// ADD instruction is last
CgInst* add = cg.insts[4];
assert(add->op == CG_ADD8);
// MUL instruction
CgInst* mul = cg.insts[3];
assert(mul->op == CG_MUL8);
// Check register validity
assert(add->binop.lhs < REG_COUNT);
assert(add->binop.rhs < REG_COUNT);
assert(add->dst < REG_COUNT);
assert(mul->binop.lhs < REG_COUNT);
assert(mul->binop.rhs < REG_COUNT);
assert(mul->dst < REG_COUNT);
// Critical correctness check:
// MUL result should be used by ADD
assert(add->binop.rhs == mul->dst);
cg_block_free(&cg);
ir_block_free(&ir);
}
void test_codegen_x86(void)
{
test_ir_block_isel_add();
test_ir_block_isel_nested();
test_cg_block_regalloc_add();
test_cg_block_regalloc_nested();
}

73
codegen_x86.h Normal file
View File

@ -0,0 +1,73 @@
#ifndef CODEGEN_X86_H
#define CODEGEN_X86_H
#include "arena.h"
#include "ir.h"
#include <stddef.h>
#include <stdint.h>
typedef enum {
CG_MOV,
CG_ADD8,
CG_SUB8,
CG_MUL8,
CG_IMM64
} CgOpCode;
typedef enum {
RAX = 0,
RDX,
RCX,
RBX,
RSI,
RDI,
R8,
R9,
REG_COUNT
} PhysReg;
typedef struct {
PhysReg reg;
int assigned;
} RegMap;
typedef struct CgInst {
CgOpCode op;
VReg dst;
union {
struct {
VReg lhs;
VReg rhs;
} binop;
uint64_t imm;
};
} CgInst;
typedef struct {
CgInst** insts;
size_t count;
size_t capacity;
VReg next_vreg;
RegMap* vreg_map;
size_t vreg_map_size;
Arena arena;
VReg result_vreg;
} CgBlock;
void cg_block_init(CgBlock* b);
void cg_block_free(CgBlock* b);
void cg_block_print_vreg(const CgBlock* block);
void cg_block_print_phys(const CgBlock* block);
void ir_block_isel_x86(CgBlock* cg, const IrBlock* ir);
void cg_block_regalloc_x86(CgBlock* block);
void test_codegen_x86(void);
#endif

6
compile_flags.txt Normal file
View File

@ -0,0 +1,6 @@
-xc
-std=c23
-Wall
-Wextra
-pedantic-errors
-Wno-empty-translation-unit

328
ir.c Normal file
View File

@ -0,0 +1,328 @@
#include "ir.h"
#include "arena.h"
#include "parse.h"
#include <assert.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
void ir_block_init(IrBlock* block)
{
arena_init(&block->arena);
block->next_vreg = 0;
block->insts = NULL;
block->count = 0;
block->capacity = 0;
}
void ir_block_free(IrBlock* block)
{
if (!block)
return;
arena_free(&block->arena);
free(block->insts);
}
static int ir_inst_index(IrBlock* block, IrInst* inst)
{
for (size_t i = 0; i < block->count; i++) {
if (block->insts[i] == inst)
return (int)i;
}
return -1;
}
void ir_block_print(IrBlock* block)
{
if (!block)
return;
for (size_t i = 0; i < block->count; i++) {
IrInst* inst = block->insts[i];
printf("%%%zu = ", i);
switch (inst->op) {
case OP_INT:
printf("Int %llu\n", (unsigned long long)inst->value);
break;
case OP_ADD:
case OP_SUB:
case OP_MUL: {
const char* op_str = inst->op == OP_ADD ? "Add"
: inst->op == OP_SUB ? "Sub"
: "Mul";
printf("%s ", op_str);
for (size_t j = 0; j < inst->operand_count; j++) {
int idx = ir_inst_index(block, inst->operands[j]);
if (idx < 0) {
printf("<?>");
} else {
printf("%%%d", idx);
}
if (j + 1 < inst->operand_count)
printf(", ");
}
printf("\n");
break;
}
default:
printf("UnknownOp\n");
break;
}
}
}
static void ir_block_emit(IrBlock* block, IrInst* inst)
{
if (!block || !inst)
return;
if (block->count == block->capacity) {
size_t new_cap = block->capacity ? block->capacity * 2 : 8;
IrInst** new_buf = realloc(block->insts, new_cap * sizeof(IrInst*));
if (!new_buf)
return;
block->insts = new_buf;
block->capacity = new_cap;
}
block->insts[block->count++] = inst;
}
static IrInst* ir_alloc_inst(IrBlock* block)
{
IrInst* inst = arena_alloc(&block->arena, sizeof(IrInst));
if (!inst)
return NULL;
memset(inst, 0, sizeof(IrInst));
return inst;
}
static IrInst* ir_new_int(IrBlock* block, uint64_t value)
{
IrInst* inst = ir_alloc_inst(block);
if (!inst)
return NULL;
inst->op = OP_INT;
inst->value = value;
inst->vreg = block->next_vreg++;
return ir_block_emit(block, inst), inst;
}
static IrInst* ir_new_binop(IrBlock* block, OpCode op, IrInst* a, IrInst* b)
{
IrInst* inst = ir_alloc_inst(block);
if (!inst)
return NULL;
inst->op = op;
inst->operand_count = 2;
inst->operands[0] = a;
inst->operands[1] = b;
inst->vreg = block->next_vreg++;
return ir_block_emit(block, inst), inst;
}
static OpCode op_from_ident_strict(const char* s)
{
if (strcmp(s, "add") == 0)
return OP_ADD;
if (strcmp(s, "sub") == 0)
return OP_SUB;
if (strcmp(s, "mul") == 0)
return OP_MUL;
return (OpCode)-1;
}
static bool is_int_literal(const char* s, uint64_t* out)
{
if (!s || !*s)
return false;
char* end = NULL;
unsigned long long v = strtoull(s, &end, 10);
if (*end != '\0')
return false;
*out = (uint64_t)v;
return true;
}
IrInst* ir_lower_expr(IrBlock* block, const Expr* expr)
{
if (!expr || !block)
return NULL;
switch (expr->type) {
case EXPR_INT: {
uint64_t value;
if (!is_int_literal(expr->text, &value)) {
return NULL; // strict: invalid integer literal
}
return ir_new_int(block, value);
}
case EXPR_IDENT:
// identifiers not supported in this IR
return NULL;
case EXPR_SEXPR: {
size_t n = expr->sexpr.count;
if (n == 0) {
return NULL; // malformed: empty s-expression
}
const Expr* head = expr->sexpr.items[0];
if (!head || head->type != EXPR_IDENT) {
return NULL; // strict: first element must be operator
}
OpCode op = op_from_ident_strict(head->text);
if (op == (OpCode)-1) {
return NULL; // unknown operator
}
size_t argc = n - 1;
// STRICT ARITY RULES (you can adjust these)
if (argc < 2) {
return NULL; // e.g. (add x) is invalid
}
if (argc > IR_MAX_ARITY) {
return NULL; // prevent overflow
}
// Lower first operand
IrInst* first = ir_lower_expr(block, expr->sexpr.items[1]);
if (!first)
return NULL;
IrInst* acc = first;
// Left-associative lowering:
// (add a b c d) => (((a + b) + c) + d)
for (size_t i = 2; i < n; i++) {
IrInst* rhs = ir_lower_expr(block, expr->sexpr.items[i]);
if (!rhs)
return NULL;
IrInst* tmp = ir_new_binop(block, op, acc, rhs);
if (!tmp)
return NULL;
acc = tmp;
}
return acc;
}
}
return NULL;
}
static void test_ir_lower_simple_add(void)
{
IrBlock block;
ir_block_init(&block);
// Build AST: (add 2 3)
Expr two = { .type = EXPR_INT, .text = "2" };
Expr three = { .type = EXPR_INT, .text = "3" };
Expr add_ident = { .type = EXPR_IDENT, .text = "add" };
Expr sexpr = { .type = EXPR_SEXPR,
.sexpr
= { .items = (Expr*[]) { &add_ident, &two, &three }, .count = 3 } };
IrInst* result = ir_lower_expr(&block, &sexpr);
assert(result != NULL);
// Check instruction count
assert(block.count == 3);
// Int 2
assert(block.insts[0]->op == OP_INT);
assert(block.insts[0]->value == 2);
// Int 3
assert(block.insts[1]->op == OP_INT);
assert(block.insts[1]->value == 3);
// Add
assert(block.insts[2]->op == OP_ADD);
// Operand wiring
assert(block.insts[2]->operands[0] == block.insts[0]);
assert(block.insts[2]->operands[1] == block.insts[1]);
ir_block_free(&block);
}
static void test_ir_lower_nested_expr(void)
{
IrBlock block;
ir_block_init(&block);
Expr two = { .type = EXPR_INT, .text = "2" };
Expr three = { .type = EXPR_INT, .text = "3" };
Expr four = { .type = EXPR_INT, .text = "4" };
Expr mul_ident = { .type = EXPR_IDENT, .text = "mul" };
Expr add_ident = { .type = EXPR_IDENT, .text = "add" };
Expr mul_expr = { .type = EXPR_SEXPR,
.sexpr
= { .items = (Expr*[]) { &mul_ident, &three, &four }, .count = 3 } };
Expr add_expr = { .type = EXPR_SEXPR,
.sexpr
= { .items = (Expr*[]) { &add_ident, &two, &mul_expr }, .count = 3 } };
IrInst* result = ir_lower_expr(&block, &add_expr);
assert(result != NULL);
assert(block.count == 5);
assert(block.insts[0]->op == OP_INT && block.insts[0]->value == 2);
assert(block.insts[1]->op == OP_INT && block.insts[1]->value == 3);
assert(block.insts[2]->op == OP_INT && block.insts[2]->value == 4);
assert(block.insts[3]->op == OP_MUL);
assert(block.insts[4]->op == OP_ADD);
// Verify MUL operands
assert(block.insts[3]->operands[0] == block.insts[1]);
assert(block.insts[3]->operands[1] == block.insts[2]);
// Verify ADD operands
assert(block.insts[4]->operands[0] == block.insts[0]);
assert(block.insts[4]->operands[1] == block.insts[3]);
ir_block_free(&block);
}
void test_ast_lower(void)
{
test_ir_lower_simple_add();
test_ir_lower_nested_expr();
}

54
ir.h Normal file
View File

@ -0,0 +1,54 @@
#ifndef IR_H
#define IR_H
#include "arena.h"
#include "parse.h"
#include <stddef.h>
#include <stdint.h>
typedef uint32_t VReg;
#define IR_MAX_ARITY 8 // adjust as needed
typedef enum {
OP_INT,
OP_ADD,
OP_SUB,
OP_MUL
} OpCode;
typedef struct IrInst IrInst;
struct IrInst {
OpCode op;
VReg vreg;
union {
uint64_t value; // OP_INT
struct {
size_t operand_count;
IrInst* operands[IR_MAX_ARITY]; // inline storage
};
};
};
typedef struct {
Arena arena;
VReg next_vreg;
IrInst** insts;
size_t count;
size_t capacity;
} IrBlock;
void ir_block_init(IrBlock* block);
void ir_block_free(IrBlock* block);
void ir_block_print(IrBlock* block);
IrInst* ir_lower_expr(IrBlock* block, const Expr* expr);
void test_ast_lower(void);
#endif

188
jit_x86.c Normal file
View File

@ -0,0 +1,188 @@
#include "jit_x86.h"
#include "codegen_x86.h"
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/mman.h>
#include <unistd.h>
static uint8_t reg_enc(PhysReg r)
{
switch (r) {
case RAX:
return 0;
case RCX:
return 1;
case RDX:
return 2;
case RBX:
return 3;
case RSI:
return 6;
case RDI:
return 7;
case R8:
return 8;
case R9:
return 9;
default:
return 0;
}
}
typedef struct {
uint8_t* buf;
size_t cap;
size_t len;
} AsmBuf;
static void emit8(AsmBuf* a, uint8_t v)
{
if (a->len >= a->cap) {
fprintf(stderr, "JIT buffer overflow\n");
abort();
}
a->buf[a->len++] = v;
}
[[maybe_unused]]
static void emit32(AsmBuf* a, uint32_t v)
{
if (a->len >= a->cap) {
fprintf(stderr, "JIT buffer overflow\n");
abort();
}
memcpy(&a->buf[a->len], &v, 4);
a->len += 4;
}
static void emit64(AsmBuf* a, uint64_t v)
{
if (a->len >= a->cap) {
fprintf(stderr, "JIT buffer overflow\n");
abort();
}
memcpy(&a->buf[a->len], &v, 8);
a->len += 8;
}
static void emit_mov_imm64(AsmBuf* a, PhysReg dst, uint64_t imm)
{
uint8_t r = reg_enc(dst);
// mov r64, imm64 = 48 B8+rd imm64
emit8(a, 0x48);
emit8(a, 0xB8 + r);
emit64(a, imm);
}
static uint8_t rex_enc(uint8_t dst, uint8_t src)
{
return 0x40 | ((dst & 8) ? 0x01 : 0) | // B
((src & 8) ? 0x04 : 0); // R
}
static void emit_alu_rr(
AsmBuf* a, uint8_t rex, uint8_t op, PhysReg dst, PhysReg src)
{
(void)rex;
uint8_t d = reg_enc(dst);
uint8_t s = reg_enc(src);
emit8(a, rex_enc(d, s));
emit8(a, op);
uint8_t modrm = 0xC0 | ((s & 7) << 3) | (d & 7);
emit8(a, modrm);
}
static void emit_add(AsmBuf* a, PhysReg d, PhysReg s)
{
emit_alu_rr(a, 0x48, 0x01, d, s);
}
static void emit_sub(AsmBuf* a, PhysReg d, PhysReg s)
{
emit_alu_rr(a, 0x48, 0x29, d, s);
}
static void emit_mul(AsmBuf* a, PhysReg d, PhysReg s)
{
emit8(a, 0x48);
emit8(a, 0x0F);
emit8(a, 0xAF);
emit8(a, 0xC0 | (reg_enc(s) << 3) | reg_enc(d));
}
static void emit_ret(AsmBuf* a)
{
emit8(a, 0xC3);
}
static size_t align_page(size_t n)
{
size_t page = sysconf(_SC_PAGESIZE);
return (n + page - 1) & ~(page - 1);
}
JitFn cg_block_emit_x86_machine_code(CgBlock* block)
{
AsmBuf a = { 0 };
a.cap = align_page(1024);
a.len = 0;
a.buf = mmap(NULL,
a.cap,
PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS,
-1,
0);
if (a.buf == MAP_FAILED) {
return NULL;
}
for (size_t i = 0; i < block->count; i++) {
CgInst* inst = block->insts[i];
switch (inst->op) {
case CG_IMM64:
emit_mov_imm64(&a, inst->dst, inst->imm);
break;
case CG_ADD8:
emit_add(&a, inst->dst, inst->binop.rhs);
break;
case CG_SUB8:
emit_sub(&a, inst->dst, inst->binop.rhs);
break;
case CG_MUL8:
emit_mul(&a, inst->dst, inst->binop.rhs);
break;
default:
break;
}
}
// ensure result is in rax (ABI return register)
emit_ret(&a);
// make executable
mprotect(a.buf, align_page(a.len), PROT_READ | PROT_EXEC);
for (size_t i = 0; i < a.len; i++) {
printf("%02x ", a.buf[i]);
}
printf("\n");
#pragma GCC diagnostic push
#pragma GCC diagnostic ignored "-Wpedantic"
return (JitFn)a.buf;
#pragma GCC diagnostic pop
}

11
jit_x86.h Normal file
View File

@ -0,0 +1,11 @@
#ifndef JIT_X86_H
#define JIT_X86_H
#include "codegen_x86.h"
#include <stdint.h>
typedef uint64_t (*JitFn)(void);
JitFn cg_block_emit_x86_machine_code(CgBlock* block);
#endif

105
main.c Normal file
View File

@ -0,0 +1,105 @@
#include "codegen_x86.h"
#include "ir.h"
#include "jit_x86.h"
#include "parse.h"
#include <assert.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
static char* read_file(const char* filename)
{
FILE* file = fopen(filename, "rb");
if (!file) {
perror("fopen");
return NULL;
}
if (fseek(file, 0, SEEK_END) != 0) {
perror("fseek");
fclose(file);
return NULL;
}
long size = ftell(file);
if (size < 0) {
perror("ftell");
fclose(file);
return NULL;
}
rewind(file);
char* buffer = malloc((size_t)size + 1);
if (!buffer) {
fprintf(stderr, "Out of memory\n");
fclose(file);
return NULL;
}
size_t bytes_read = fread(buffer, 1, (size_t)size, file);
if (bytes_read != (size_t)size) {
perror("fread");
free(buffer);
fclose(file);
return NULL;
}
buffer[size] = '\0';
fclose(file);
return buffer;
}
int main(int argc, char** argv)
{
if (argc > 1 && strcmp(argv[1], "--test") == 0) {
test_parse();
test_ast_lower();
test_codegen_x86();
return 0;
}
assert(argc > 1);
char* text = read_file(argv[1]);
printf("--- text ---\n");
puts(text);
Expr* expr = parse(text);
free(text);
printf("--- ast ---\n");
expr_print(expr);
IrBlock ir_block;
ir_block_init(&ir_block);
ir_lower_expr(&ir_block, expr);
expr_free(expr);
printf("\n--- ir ---\n");
ir_block_print(&ir_block);
CgBlock cg_block;
cg_block_init(&cg_block);
ir_block_isel_x86(&cg_block, &ir_block);
ir_block_free(&ir_block);
printf("--- isel ---\n");
cg_block_print_vreg(&cg_block);
printf("--- regalloc ---\n");
cg_block_regalloc_x86(&cg_block);
cg_block_print_phys(&cg_block);
JitFn fn = cg_block_emit_x86_machine_code(&cg_block);
cg_block_free(&cg_block);
printf("--- result ---\n");
uint64_t result = fn();
printf("%lu\n", result);
return 0;
}

354
parse.c Normal file
View File

@ -0,0 +1,354 @@
// parser.c
#include "parse.h"
#include <assert.h>
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef enum {
TOKEN_LPAREN,
TOKEN_RPAREN,
TOKEN_IDENT,
TOKEN_INT,
TOKEN_EOF,
} TokenType;
typedef struct {
TokenType type;
char* text;
} Token;
typedef struct {
const char* input;
size_t pos;
} Lexer;
typedef struct {
Lexer lexer;
Token current;
} Parser;
/* =========================
Lexer
========================= */
static char current_char(Lexer* lexer)
{
return lexer->input[lexer->pos];
}
static void advance(Lexer* lexer)
{
if (current_char(lexer) != '\0') {
lexer->pos++;
}
}
static void skip_whitespace(Lexer* lexer)
{
while (isspace(current_char(lexer))) {
advance(lexer);
}
}
static Token make_ident(Lexer* lexer)
{
size_t start = lexer->pos;
while (isalnum(current_char(lexer)) || current_char(lexer) == '_') {
advance(lexer);
}
return (Token) {
.type = TOKEN_IDENT,
.text = strndup(lexer->input + start, lexer->pos - start),
};
}
static Token make_int(Lexer* lexer)
{
size_t start = lexer->pos;
while (isdigit(current_char(lexer))) {
advance(lexer);
}
return (Token) {
.type = TOKEN_INT,
.text = strndup(lexer->input + start, lexer->pos - start),
};
}
static Token next_token(Lexer* lexer)
{
skip_whitespace(lexer);
char c = current_char(lexer);
switch (c) {
case '\0':
return (Token) {
.type = TOKEN_EOF,
.text = NULL,
};
case '(':
advance(lexer);
return (Token) {
.type = TOKEN_LPAREN,
.text = NULL,
};
case ')':
advance(lexer);
return (Token) {
.type = TOKEN_RPAREN,
.text = NULL,
};
default:
break;
}
if (isalpha(c) || c == '_') {
return make_ident(lexer);
}
if (isdigit(c)) {
return make_int(lexer);
}
fprintf(stderr, "Unexpected character: '%c'\n", c);
exit(EXIT_FAILURE);
}
/* =========================
Parser
========================= */
static void parser_advance(Parser* parser)
{
free(parser->current.text);
parser->current = next_token(&parser->lexer);
}
static void parser_expect(Parser* parser, TokenType expected)
{
if (parser->current.type != expected) {
fprintf(stderr, "Unexpected token\n");
exit(EXIT_FAILURE);
}
}
static Expr* make_atom_expr(ExprType type, char* text)
{
Expr* expr = malloc(sizeof(Expr));
expr->type = type;
expr->text = text;
return expr;
}
static Expr* make_sexpr(void)
{
Expr* expr = malloc(sizeof(Expr));
expr->type = EXPR_SEXPR;
expr->sexpr.items = NULL;
expr->sexpr.count = 0;
return expr;
}
static void sexpr_push(Expr* sexpr, Expr* item)
{
sexpr->sexpr.items
= realloc(sexpr->sexpr.items, sizeof(Expr*) * (sexpr->sexpr.count + 1));
sexpr->sexpr.items[sexpr->sexpr.count++] = item;
}
static Expr* parse_expr(Parser* parser);
static Expr* parse_list(Parser* parser)
{
parser_expect(parser, TOKEN_LPAREN);
parser_advance(parser);
Expr* sexpr = make_sexpr();
while (parser->current.type != TOKEN_RPAREN) {
Expr* expr = parse_expr(parser);
sexpr_push(sexpr, expr);
}
parser_expect(parser, TOKEN_RPAREN);
parser_advance(parser);
return sexpr;
}
static Expr* parse_expr(Parser* parser)
{
switch (parser->current.type) {
case TOKEN_IDENT: {
char* text = strdup(parser->current.text);
parser_advance(parser);
return make_atom_expr(EXPR_IDENT, text);
}
case TOKEN_INT: {
char* text = strdup(parser->current.text);
parser_advance(parser);
return make_atom_expr(EXPR_INT, text);
}
case TOKEN_LPAREN:
return parse_list(parser);
default:
fprintf(stderr, "Unexpected token in expression\n");
exit(EXIT_FAILURE);
}
}
/* =========================
Public API
========================= */
Expr* parse(const char* source)
{
Parser parser = {
.lexer = {
.input = source,
.pos = 0,
},
.current = {0},
};
parser.current = next_token(&parser.lexer);
Expr* expr = parse_expr(&parser);
if (parser.current.type != TOKEN_EOF) {
fprintf(stderr, "Expected EOF\n");
exit(EXIT_FAILURE);
}
free(parser.current.text);
return expr;
}
/* =========================
Debug Printing
========================= */
static void print_sexpr(Expr* expr)
{
printf("SExpr [ ");
for (size_t i = 0; i < expr->sexpr.count; i++) {
expr_print(expr->sexpr.items[i]);
if (i + 1 < expr->sexpr.count) {
printf(", ");
}
}
printf(" ]");
}
void expr_print(Expr* expr)
{
switch (expr->type) {
case EXPR_IDENT:
printf("Ident(\"%s\")", expr->text);
break;
case EXPR_INT:
printf("Int(%s)", expr->text);
break;
case EXPR_SEXPR:
print_sexpr(expr);
break;
}
}
/* =========================
Memory Cleanup
========================= */
void expr_free(Expr* expr)
{
switch (expr->type) {
case EXPR_IDENT:
case EXPR_INT:
free(expr->text);
break;
case EXPR_SEXPR:
for (size_t i = 0; i < expr->sexpr.count; i++) {
expr_free(expr->sexpr.items[i]);
}
free(expr->sexpr.items);
break;
}
free(expr);
}
/* =========================
Unit Tests
========================= */
static void test_simple_list(void)
{
Expr* expr = parse("(add 2)");
assert(expr->type == EXPR_SEXPR);
assert(expr->sexpr.count == 2);
assert(expr->sexpr.items[0]->type == EXPR_IDENT);
assert(strcmp(expr->sexpr.items[0]->text, "add") == 0);
assert(expr->sexpr.items[1]->type == EXPR_INT);
assert(strcmp(expr->sexpr.items[1]->text, "2") == 0);
expr_free(expr);
}
static void test_nested_list(void)
{
Expr* expr = parse("(add 2 (mul 3 4))");
assert(expr->type == EXPR_SEXPR);
assert(expr->sexpr.count == 3);
Expr* nested = expr->sexpr.items[2];
assert(nested->type == EXPR_SEXPR);
assert(nested->sexpr.count == 3);
assert(strcmp(nested->sexpr.items[0]->text, "mul") == 0);
assert(strcmp(nested->sexpr.items[1]->text, "3") == 0);
assert(strcmp(nested->sexpr.items[2]->text, "4") == 0);
expr_free(expr);
}
void test_parse(void)
{
test_simple_list();
test_nested_list();
}

33
parse.h Normal file
View File

@ -0,0 +1,33 @@
#ifndef PARSE_H
#define PARSE_H
#include <stddef.h>
typedef enum {
EXPR_IDENT,
EXPR_INT,
EXPR_SEXPR,
} ExprType;
typedef struct Expr Expr;
struct Expr {
ExprType type;
union {
char* text;
struct {
Expr** items;
size_t count;
} sexpr;
};
};
Expr* parse(const char* source);
void expr_free(Expr* expr);
void expr_print(Expr* expr);
void test_parse(void);
#endif

2
test.lisp Normal file
View File

@ -0,0 +1,2 @@
(add (mul 4 6) (mul (add 3 4) (add 5 6)))