diff --git a/asm/assemble.c b/asm/assemble.c new file mode 100644 index 0000000..69f7dc3 --- /dev/null +++ b/asm/assemble.c @@ -0,0 +1,686 @@ +#include "asm.h" +#include "eval.h" +#include "parse.h" +#include "report.h" +#include +#include +#include +#include + +typedef enum { + // clang-format off + M_err, M_d8, M_d16, M_nop, M_hlt, M_jmp, + M_jmpf, M_jnz, M_cmp, M_mov, M_in, M_out, + M_call, M_callf, M_ret, M_retf, M_lit, M_int, + M_iret, M_or, M_xor, M_and, M_shl, M_rshl, + M_shr, M_rshr, M_add, M_sub, M_rsub, M_mul, + M_imul, M_div, M_idiv, M_rdiv, M_ridiv, M_mod, + M_rmod, M_push, M_pop + // clang-format on +} Mnemonic; + +static const char* mnemonic_str[] = { + // clang-format off + "err", "d8", "d16", "nop", "hlt", "jmp", + "jmpf", "jnz", "cmp", "mov", "in", "out", + "call", "callf", "ret", "retf", "lit", "int", + "iret", "or", "xor", "and", "shl", "rshl", + "shr", "rshr", "add", "sub", "rsub", "mul", + "imul", "div", "idiv", "rdiv", "ridiv", "mod", + "rmod", "push", "pop" + // clang-format on +}; + +uint16_t pline_assemble(OperandEvaluator* evaluator, + uint16_t* object, + const PLine* line, + Reporter* rep) +{ + +#define CHECK_OPERAND(OP) \ + do { \ + if ((OP).ty == EoTy_Err) { \ + return 0; \ + } \ + } while (0) + +#define ASSEMBLE_ONE(RVAL) \ + do { \ + Line l = (RVAL); \ + return assemble_line(object, &l); \ + } while (0) + + size_t mnemonics_amount = sizeof(mnemonic_str) / sizeof(mnemonic_str[0]); + Mnemonic m = M_err; + for (size_t i = 0; i < mnemonics_amount; ++i) { + if (strcmp(mnemonic_str[i], line->op) == 0) { + m = (Mnemonic)i; + break; + } + } + switch (m) { + case M_err: { + REPORTF_ERROR("unrecognized mnemonic '%s'", line->op); + reporter_print_loc(rep, line->loc); + return 0; + } + case M_d8: { + if (line->ops_size > 64) { + reporter_error_with_loc( + rep, "too many operands (max is 64)", line->loc); + return 0; + } + size_t buffer_capacity = 128; + uint8_t* buffer = malloc(sizeof(uint8_t) * buffer_capacity); + size_t buffer_size = 0; + for (size_t i = 0; i < line->ops_size; ++i) { + EvaledOperand val = eval_operand(evaluator, line->ops[i]); + CHECK_OPERAND(val); + switch (val.ty) { + case EoTy_Imm: + buffer[buffer_size++] = (uint8_t)val.imm; + break; + case EoTy_Str: { + for (size_t si = 0; si < line->ops[i]->str_len; ++si) { + buffer[buffer_size++] + = (uint8_t)line->ops[i]->str[si]; + } + break; + } + default: + reporter_error_with_loc( + rep, "invalid operand", line->ops[i]->loc); + return 0; + } + } + uint16_t ip_diff = 0; + for (size_t i = 0; i < buffer_size; i += 2) { + uint16_t data = 0; + // XXX: little endian + data |= buffer[i]; + if (i + 1 < buffer_size) { + data |= (uint16_t)((uint16_t)buffer[i] << 8); + } + Line l = s_data_i(data); + ip_diff += assemble_line(object, &l); + } + return ip_diff; + } + case M_d16: { + if (line->ops_size > 32) { + reporter_error_with_loc( + rep, "too many operands (max is 32)", line->loc); + return 0; + } + uint16_t ip_diff = 0; + for (size_t i = 0; i < line->ops_size; ++i) { + EvaledOperand val = eval_operand(evaluator, line->ops[i]); + CHECK_OPERAND(val); + switch (val.ty) { + case EoTy_Imm: { + Line l = s_data_i(val.imm); + ip_diff += assemble_line(object, &l); + break; + } + default: + reporter_error_with_loc( + rep, "invalid operand", line->ops[i]->loc); + return 0; + } + } + return ip_diff; + } + case M_nop: + if (line->ops_size == 0) + ASSEMBLE_ONE(s_nop()); + break; + case M_hlt: + if (line->ops_size == 0) + ASSEMBLE_ONE(s_hlt()); + break; + case M_jmp: + if (line->ops_size == 1) { + EvaledOperand op1 = eval_operand(evaluator, line->ops[0]); + CHECK_OPERAND(op1); + if (op1.ty == EoTy_Reg) + ASSEMBLE_ONE(s_jmp_r(op1.reg)); + if (op1.ty == EoTy_Imm) + ASSEMBLE_ONE(s_jmp_i(op1.imm)); + } + break; + case M_jmpf: + if (line->ops_size == 2) { + EvaledOperand op1 = eval_operand(evaluator, line->ops[0]); + CHECK_OPERAND(op1); + EvaledOperand op2 = eval_operand(evaluator, line->ops[1]); + CHECK_OPERAND(op2); + if (op1.ty == EoTy_Reg) { + if (op2.ty == EoTy_Reg) + ASSEMBLE_ONE(s_jmpf_r_r(op1.reg, op2.reg)); + if (op2.ty == EoTy_Imm) + ASSEMBLE_ONE(s_jmpf_r_i(op1.reg, op2.imm)); + } + if (op1.ty == EoTy_Imm) { + if (op2.ty == EoTy_Reg) + ASSEMBLE_ONE(s_jmpf_i_r(op1.imm, op2.reg)); + if (op2.ty == EoTy_Imm) + ASSEMBLE_ONE(s_jmpf_i_i(op1.imm, op2.imm)); + } + } + break; + case M_jnz: + if (line->ops_size == 2) { + EvaledOperand op1 = eval_operand(evaluator, line->ops[0]); + CHECK_OPERAND(op1); + EvaledOperand op2 = eval_operand(evaluator, line->ops[1]); + CHECK_OPERAND(op2); + if (op1.ty == EoTy_Reg) { + if (op2.ty == EoTy_Reg) + ASSEMBLE_ONE(s_jnz_r(op1.reg, op2.reg)); + if (op2.ty == EoTy_Imm) + ASSEMBLE_ONE(s_jnz_i(op1.reg, op2.imm)); + } + } + break; + case M_cmp: + if (line->ops_size == 2) { + EvaledOperand op1 = eval_operand(evaluator, line->ops[0]); + CHECK_OPERAND(op1); + EvaledOperand op2 = eval_operand(evaluator, line->ops[1]); + CHECK_OPERAND(op2); + if (op1.ty == EoTy_Reg) { + if (op2.ty == EoTy_Reg) + ASSEMBLE_ONE(s_cmp_r(op1.reg, op2.reg)); + if (op2.ty == EoTy_Imm) + ASSEMBLE_ONE(s_cmp_i(op1.reg, op2.imm)); + } + } + break; + case M_mov: + if (line->ops_size == 2) { + EvaledOperand op1 = eval_operand(evaluator, line->ops[0]); + CHECK_OPERAND(op1); + EvaledOperand op2 = eval_operand(evaluator, line->ops[1]); + CHECK_OPERAND(op2); + if (op1.ty == EoTy_Reg) { + if (op2.ty == EoTy_Reg) + ASSEMBLE_ONE(s_mov16_r_r(op1.reg, op2.reg)); + if (op2.ty == EoTy_Imm) + ASSEMBLE_ONE(s_mov16_r_i(op1.reg, op2.imm)); + if (op2.ty == EoTy_Mem8Reg) + ASSEMBLE_ONE(s_mov8_r_mr(op1.reg, op2.reg, op2.offset)); + if (op2.ty == EoTy_Mem8Imm) + ASSEMBLE_ONE(s_mov8_r_mi(op1.reg, op2.imm)); + if (op2.ty == EoTy_MemU16Reg) + ASSEMBLE_ONE( + s_mov16_r_mr(op1.reg, op2.reg, op2.offset)); + if (op2.ty == EoTy_MemU16Imm) { + ASSEMBLE_ONE(s_mov16_r_mi(op1.reg, op2.imm)); + } + } + if (op1.ty == EoTy_Mem8Reg) { + if (op2.ty == EoTy_Reg) + ASSEMBLE_ONE(s_mov8_mr_r(op1.reg, op1.offset, op2.reg)); + if (op2.ty == EoTy_Imm) + ASSEMBLE_ONE(s_mov8_mr_i(op1.reg, op1.offset, op2.imm)); + } + if (op1.ty == EoTy_Mem8Imm) { + if (op2.ty == EoTy_Reg) + ASSEMBLE_ONE(s_mov8_mi_r(op1.imm, op2.reg)); + if (op2.ty == EoTy_Imm) + ASSEMBLE_ONE(s_mov8_mi_i(op1.imm, op2.imm)); + } + if (op1.ty == EoTy_MemU16Reg) { + if (op2.ty == EoTy_Reg) + ASSEMBLE_ONE( + s_mov16_mr_r(op1.reg, op1.offset, op2.reg)); + if (op2.ty == EoTy_Imm) + ASSEMBLE_ONE( + s_mov16_mr_i(op1.reg, op1.offset, op2.imm)); + } + if (op1.ty == EoTy_MemU16Imm) { + if (op2.ty == EoTy_Reg) + ASSEMBLE_ONE(s_mov16_mi_r(op1.imm, op2.reg)); + if (op2.ty == EoTy_Imm) + ASSEMBLE_ONE(s_mov16_mi_i(op1.imm, op2.imm)); + } + } + break; + case M_in: + if (line->ops_size == 2) { + EvaledOperand dst = eval_operand(evaluator, line->ops[0]); + CHECK_OPERAND(dst); + EvaledOperand op1 = eval_operand(evaluator, line->ops[1]); + CHECK_OPERAND(op1); + if (dst.ty == EoTy_Reg) { + if (op1.ty == EoTy_Reg) + ASSEMBLE_ONE(s_in_r(dst.reg, op1.reg)); + if (op1.ty == EoTy_Imm) + ASSEMBLE_ONE(s_in_i(dst.reg, op1.imm)); + } + } + break; + case M_out: + if (line->ops_size == 2) { + EvaledOperand op1 = eval_operand(evaluator, line->ops[0]); + CHECK_OPERAND(op1); + EvaledOperand op2 = eval_operand(evaluator, line->ops[1]); + CHECK_OPERAND(op2); + if (op1.ty == EoTy_Reg) { + if (op2.ty == EoTy_Reg) + ASSEMBLE_ONE(s_out_r_r(op1.reg, op2.reg)); + if (op2.ty == EoTy_Imm) + ASSEMBLE_ONE(s_out_r_i(op1.reg, op2.imm)); + } + if (op1.ty == EoTy_Imm) { + if (op2.ty == EoTy_Reg) + ASSEMBLE_ONE(s_out_i_r(op1.imm, op2.reg)); + if (op2.ty == EoTy_Imm) + ASSEMBLE_ONE(s_out_i_i(op1.imm, op2.imm)); + } + } + break; + case M_call: + if (line->ops_size == 1) { + EvaledOperand op1 = eval_operand(evaluator, line->ops[0]); + CHECK_OPERAND(op1); + if (op1.ty == EoTy_Reg) + ASSEMBLE_ONE(s_call_r(op1.reg)); + if (op1.ty == EoTy_Imm) + ASSEMBLE_ONE(s_call_i(op1.imm)); + } + break; + case M_callf: + if (line->ops_size == 2) { + EvaledOperand op1 = eval_operand(evaluator, line->ops[0]); + CHECK_OPERAND(op1); + EvaledOperand op2 = eval_operand(evaluator, line->ops[1]); + CHECK_OPERAND(op2); + if (op1.ty == EoTy_Reg) { + if (op2.ty == EoTy_Reg) + ASSEMBLE_ONE(s_callf_r_r(op1.reg, op2.reg)); + if (op2.ty == EoTy_Imm) + ASSEMBLE_ONE(s_callf_r_i(op1.reg, op2.imm)); + } + if (op1.ty == EoTy_Imm) { + if (op2.ty == EoTy_Reg) + ASSEMBLE_ONE(s_callf_i_r(op1.imm, op2.reg)); + if (op2.ty == EoTy_Imm) + ASSEMBLE_ONE(s_callf_i_i(op1.imm, op2.imm)); + } + } + break; + case M_ret: + if (line->ops_size == 0) { + ASSEMBLE_ONE(s_ret()); + } + break; + case M_retf: + if (line->ops_size == 0) { + ASSEMBLE_ONE(s_retf()); + } + break; + case M_lit: + if (line->ops_size == 1) { + EvaledOperand op1 = eval_operand(evaluator, line->ops[0]); + CHECK_OPERAND(op1); + if (op1.ty == EoTy_Reg) + ASSEMBLE_ONE(s_lit_r(op1.reg)); + if (op1.ty == EoTy_Imm) + ASSEMBLE_ONE(s_lit_i(op1.imm)); + } + break; + case M_int: + if (line->ops_size == 1) { + EvaledOperand op1 = eval_operand(evaluator, line->ops[0]); + CHECK_OPERAND(op1); + if (op1.ty == EoTy_Imm) { + if (op1.imm > 0xff) { + reporter_error_with_loc( + rep, "interrupt id exceeds 1 byte", line->loc); + return 0; + } + ASSEMBLE_ONE(s_int((uint8_t)op1.imm)); + } + } + break; + case M_iret: + if (line->ops_size == 0) { + ASSEMBLE_ONE(s_iret()); + } + break; + case M_or: + if (line->ops_size == 3) { + EvaledOperand dst = eval_operand(evaluator, line->ops[0]); + CHECK_OPERAND(dst); + EvaledOperand op1 = eval_operand(evaluator, line->ops[1]); + CHECK_OPERAND(op1); + EvaledOperand op2 = eval_operand(evaluator, line->ops[2]); + CHECK_OPERAND(op2); + if (dst.ty == EoTy_Reg && op1.ty == EoTy_Reg) { + if (op2.ty == EoTy_Reg) + ASSEMBLE_ONE(s_or_r(dst.reg, op1.reg, op2.reg)); + if (op2.ty == EoTy_Imm) { + ASSEMBLE_ONE(s_or_i(dst.reg, op1.reg, op2.imm)); + } + } + } + break; + case M_xor: + if (line->ops_size == 3) { + EvaledOperand dst = eval_operand(evaluator, line->ops[0]); + CHECK_OPERAND(dst); + EvaledOperand op1 = eval_operand(evaluator, line->ops[1]); + CHECK_OPERAND(op1); + EvaledOperand op2 = eval_operand(evaluator, line->ops[2]); + CHECK_OPERAND(op2); + if (dst.ty == EoTy_Reg && op1.ty == EoTy_Reg) { + if (op2.ty == EoTy_Reg) + ASSEMBLE_ONE(s_xor_r(dst.reg, op1.reg, op2.reg)); + if (op2.ty == EoTy_Imm) + ASSEMBLE_ONE(s_xor_i(dst.reg, op1.reg, op2.imm)); + } + } + break; + case M_and: + if (line->ops_size == 3) { + EvaledOperand dst = eval_operand(evaluator, line->ops[0]); + CHECK_OPERAND(dst); + EvaledOperand op1 = eval_operand(evaluator, line->ops[1]); + CHECK_OPERAND(op1); + EvaledOperand op2 = eval_operand(evaluator, line->ops[2]); + CHECK_OPERAND(op2); + if (dst.ty == EoTy_Reg && op1.ty == EoTy_Reg) { + if (op2.ty == EoTy_Reg) + ASSEMBLE_ONE(s_and_r(dst.reg, op1.reg, op2.reg)); + if (op2.ty == EoTy_Imm) + ASSEMBLE_ONE(s_and_i(dst.reg, op1.reg, op2.imm)); + } + } + break; + case M_shl: + if (line->ops_size == 3) { + EvaledOperand dst = eval_operand(evaluator, line->ops[0]); + CHECK_OPERAND(dst); + EvaledOperand op1 = eval_operand(evaluator, line->ops[1]); + CHECK_OPERAND(op1); + EvaledOperand op2 = eval_operand(evaluator, line->ops[2]); + CHECK_OPERAND(op2); + if (dst.ty == EoTy_Reg && op1.ty == EoTy_Reg) { + if (op2.ty == EoTy_Reg) + ASSEMBLE_ONE(s_shl_r(dst.reg, op1.reg, op2.reg)); + if (op2.ty == EoTy_Imm) + ASSEMBLE_ONE(s_shl_i(dst.reg, op1.reg, op2.imm)); + } + } + break; + case M_rshl: + if (line->ops_size == 3) { + EvaledOperand dst = eval_operand(evaluator, line->ops[0]); + CHECK_OPERAND(dst); + EvaledOperand op1 = eval_operand(evaluator, line->ops[1]); + CHECK_OPERAND(op1); + EvaledOperand op2 = eval_operand(evaluator, line->ops[2]); + CHECK_OPERAND(op2); + if (dst.ty == EoTy_Reg && op1.ty == EoTy_Reg) { + if (op2.ty == EoTy_Reg) + ASSEMBLE_ONE(s_rshl_r(dst.reg, op1.reg, op2.reg)); + if (op2.ty == EoTy_Imm) + ASSEMBLE_ONE(s_rshl_i(dst.reg, op1.reg, op2.imm)); + } + } + break; + case M_shr: + if (line->ops_size == 3) { + EvaledOperand dst = eval_operand(evaluator, line->ops[0]); + CHECK_OPERAND(dst); + EvaledOperand op1 = eval_operand(evaluator, line->ops[1]); + CHECK_OPERAND(op1); + EvaledOperand op2 = eval_operand(evaluator, line->ops[2]); + CHECK_OPERAND(op2); + if (dst.ty == EoTy_Reg && op1.ty == EoTy_Reg) { + if (op2.ty == EoTy_Reg) + ASSEMBLE_ONE(s_shr_r(dst.reg, op1.reg, op2.reg)); + if (op2.ty == EoTy_Imm) + ASSEMBLE_ONE(s_shr_i(dst.reg, op1.reg, op2.imm)); + } + } + break; + case M_rshr: + if (line->ops_size == 3) { + EvaledOperand dst = eval_operand(evaluator, line->ops[0]); + CHECK_OPERAND(dst); + EvaledOperand op1 = eval_operand(evaluator, line->ops[1]); + CHECK_OPERAND(op1); + EvaledOperand op2 = eval_operand(evaluator, line->ops[2]); + CHECK_OPERAND(op2); + if (dst.ty == EoTy_Reg && op1.ty == EoTy_Reg) { + if (op2.ty == EoTy_Reg) + ASSEMBLE_ONE(s_rshr_r(dst.reg, op1.reg, op2.reg)); + if (op2.ty == EoTy_Imm) + ASSEMBLE_ONE(s_rshr_i(dst.reg, op1.reg, op2.imm)); + } + } + break; + case M_add: + if (line->ops_size == 3) { + EvaledOperand dst = eval_operand(evaluator, line->ops[0]); + CHECK_OPERAND(dst); + EvaledOperand op1 = eval_operand(evaluator, line->ops[1]); + CHECK_OPERAND(op1); + EvaledOperand op2 = eval_operand(evaluator, line->ops[2]); + CHECK_OPERAND(op2); + if (dst.ty == EoTy_Reg && op1.ty == EoTy_Reg) { + if (op2.ty == EoTy_Reg) + ASSEMBLE_ONE(s_add_r(dst.reg, op1.reg, op2.reg)); + if (op2.ty == EoTy_Imm) + ASSEMBLE_ONE(s_add_i(dst.reg, op1.reg, op2.imm)); + } + } + break; + case M_sub: + if (line->ops_size == 3) { + EvaledOperand dst = eval_operand(evaluator, line->ops[0]); + CHECK_OPERAND(dst); + EvaledOperand op1 = eval_operand(evaluator, line->ops[1]); + CHECK_OPERAND(op1); + EvaledOperand op2 = eval_operand(evaluator, line->ops[2]); + CHECK_OPERAND(op2); + if (dst.ty == EoTy_Reg && op1.ty == EoTy_Reg) { + if (op2.ty == EoTy_Reg) + ASSEMBLE_ONE(s_sub_r(dst.reg, op1.reg, op2.reg)); + if (op2.ty == EoTy_Imm) + ASSEMBLE_ONE(s_sub_i(dst.reg, op1.reg, op2.imm)); + } + } + break; + case M_rsub: + if (line->ops_size == 3) { + EvaledOperand dst = eval_operand(evaluator, line->ops[0]); + CHECK_OPERAND(dst); + EvaledOperand op1 = eval_operand(evaluator, line->ops[1]); + CHECK_OPERAND(op1); + EvaledOperand op2 = eval_operand(evaluator, line->ops[2]); + CHECK_OPERAND(op2); + if (dst.ty == EoTy_Reg && op1.ty == EoTy_Reg) { + if (op2.ty == EoTy_Reg) + ASSEMBLE_ONE(s_rsub_r(dst.reg, op1.reg, op2.reg)); + if (op2.ty == EoTy_Imm) + ASSEMBLE_ONE(s_rsub_i(dst.reg, op1.reg, op2.imm)); + } + } + break; + case M_mul: + if (line->ops_size == 3) { + EvaledOperand dst = eval_operand(evaluator, line->ops[0]); + CHECK_OPERAND(dst); + EvaledOperand op1 = eval_operand(evaluator, line->ops[1]); + CHECK_OPERAND(op1); + EvaledOperand op2 = eval_operand(evaluator, line->ops[2]); + CHECK_OPERAND(op2); + if (dst.ty == EoTy_Reg && op1.ty == EoTy_Reg) { + if (op2.ty == EoTy_Reg) + ASSEMBLE_ONE(s_mul_r(dst.reg, op1.reg, op2.reg)); + if (op2.ty == EoTy_Imm) + ASSEMBLE_ONE(s_mul_i(dst.reg, op1.reg, op2.imm)); + } + } + break; + case M_imul: + if (line->ops_size == 3) { + EvaledOperand dst = eval_operand(evaluator, line->ops[0]); + CHECK_OPERAND(dst); + EvaledOperand op1 = eval_operand(evaluator, line->ops[1]); + CHECK_OPERAND(op1); + EvaledOperand op2 = eval_operand(evaluator, line->ops[2]); + CHECK_OPERAND(op2); + if (dst.ty == EoTy_Reg && op1.ty == EoTy_Reg) { + if (op2.ty == EoTy_Reg) + ASSEMBLE_ONE(s_imul_r(dst.reg, op1.reg, op2.reg)); + if (op2.ty == EoTy_Imm) + ASSEMBLE_ONE(s_imul_i(dst.reg, op1.reg, op2.imm)); + } + } + break; + case M_div: + if (line->ops_size == 3) { + EvaledOperand dst = eval_operand(evaluator, line->ops[0]); + CHECK_OPERAND(dst); + EvaledOperand op1 = eval_operand(evaluator, line->ops[1]); + CHECK_OPERAND(op1); + EvaledOperand op2 = eval_operand(evaluator, line->ops[2]); + CHECK_OPERAND(op2); + if (dst.ty == EoTy_Reg && op1.ty == EoTy_Reg) { + if (op2.ty == EoTy_Reg) + ASSEMBLE_ONE(s_div_r(dst.reg, op1.reg, op2.reg)); + if (op2.ty == EoTy_Imm) + ASSEMBLE_ONE(s_div_i(dst.reg, op1.reg, op2.imm)); + } + } + break; + case M_idiv: + if (line->ops_size == 3) { + EvaledOperand dst = eval_operand(evaluator, line->ops[0]); + CHECK_OPERAND(dst); + EvaledOperand op1 = eval_operand(evaluator, line->ops[1]); + CHECK_OPERAND(op1); + EvaledOperand op2 = eval_operand(evaluator, line->ops[2]); + CHECK_OPERAND(op2); + if (dst.ty == EoTy_Reg && op1.ty == EoTy_Reg) { + if (op2.ty == EoTy_Reg) + ASSEMBLE_ONE(s_idiv_r(dst.reg, op1.reg, op2.reg)); + if (op2.ty == EoTy_Imm) + ASSEMBLE_ONE(s_idiv_i(dst.reg, op1.reg, op2.imm)); + } + } + break; + case M_rdiv: + if (line->ops_size == 3) { + EvaledOperand dst = eval_operand(evaluator, line->ops[0]); + CHECK_OPERAND(dst); + EvaledOperand op1 = eval_operand(evaluator, line->ops[1]); + CHECK_OPERAND(op1); + EvaledOperand op2 = eval_operand(evaluator, line->ops[2]); + CHECK_OPERAND(op2); + if (dst.ty == EoTy_Reg && op1.ty == EoTy_Reg) { + if (op2.ty == EoTy_Reg) + ASSEMBLE_ONE(s_rdiv_r(dst.reg, op1.reg, op2.reg)); + if (op2.ty == EoTy_Imm) + ASSEMBLE_ONE(s_rdiv_i(dst.reg, op1.reg, op2.imm)); + } + } + break; + case M_ridiv: + if (line->ops_size == 3) { + EvaledOperand dst = eval_operand(evaluator, line->ops[0]); + CHECK_OPERAND(dst); + EvaledOperand op1 = eval_operand(evaluator, line->ops[1]); + CHECK_OPERAND(op1); + EvaledOperand op2 = eval_operand(evaluator, line->ops[2]); + CHECK_OPERAND(op2); + if (dst.ty == EoTy_Reg && op1.ty == EoTy_Reg) { + if (op2.ty == EoTy_Reg) + ASSEMBLE_ONE(s_ridiv_r(dst.reg, op1.reg, op2.reg)); + if (op2.ty == EoTy_Imm) + ASSEMBLE_ONE(s_ridiv_i(dst.reg, op1.reg, op2.imm)); + } + } + break; + case M_mod: + if (line->ops_size == 3) { + EvaledOperand dst = eval_operand(evaluator, line->ops[0]); + CHECK_OPERAND(dst); + EvaledOperand op1 = eval_operand(evaluator, line->ops[1]); + CHECK_OPERAND(op1); + EvaledOperand op2 = eval_operand(evaluator, line->ops[2]); + CHECK_OPERAND(op2); + if (dst.ty == EoTy_Reg && op1.ty == EoTy_Reg) { + if (op2.ty == EoTy_Reg) + ASSEMBLE_ONE(s_mod_r(dst.reg, op1.reg, op2.reg)); + if (op2.ty == EoTy_Imm) + ASSEMBLE_ONE(s_mod_i(dst.reg, op1.reg, op2.imm)); + } + } + break; + case M_rmod: + if (line->ops_size == 3) { + EvaledOperand dst = eval_operand(evaluator, line->ops[0]); + CHECK_OPERAND(dst); + EvaledOperand op1 = eval_operand(evaluator, line->ops[1]); + CHECK_OPERAND(op1); + EvaledOperand op2 = eval_operand(evaluator, line->ops[2]); + CHECK_OPERAND(op2); + if (dst.ty == EoTy_Reg && op1.ty == EoTy_Reg) { + if (op2.ty == EoTy_Reg) + ASSEMBLE_ONE(s_rmod_r(dst.reg, op1.reg, op2.reg)); + if (op2.ty == EoTy_Imm) + ASSEMBLE_ONE(s_rmod_i(dst.reg, op1.reg, op2.imm)); + } + } + break; + case M_push: + if (line->ops_size == 1) { + EvaledOperand op1 = eval_operand(evaluator, line->ops[0]); + CHECK_OPERAND(op1); + if (op1.ty == EoTy_Reg) { + uint16_t size = 0; + Line l; + l = s_add_i(Rsp, Rsp, 2); + size += assemble_line(object, &l); + l = s_mov16_mr_r(Rsp, 0, op1.reg); + size += assemble_line(object, &l); + return size; + } + if (op1.ty == EoTy_Imm) { + uint16_t size = 0; + Line l; + l = s_add_i(Rsp, Rsp, 2); + size += assemble_line(object, &l); + l = s_mov16_mr_i(Rsp, 0, op1.imm); + size += assemble_line(object, &l); + return size; + } + } + break; + case M_pop: + if (line->ops_size == 1) { + EvaledOperand op1 = eval_operand(evaluator, line->ops[0]); + CHECK_OPERAND(op1); + if (op1.ty == EoTy_Reg) { + uint16_t size = 0; + Line l; + l = s_mov16_r_mr(op1.reg, Rsp, 0); + size += assemble_line(object, &l); + l = s_sub_i(Rsp, Rsp, 2); + size += assemble_line(object, &l); + return size; + } + } + break; + } + reporter_error_with_loc(rep, "malformed instruction", line->loc); + return 0; + +#undef CHECK_OPERAND +#undef ASSEMBLE_ONE +} diff --git a/asm/assemble.h b/asm/assemble.h new file mode 100644 index 0000000..ef7c344 --- /dev/null +++ b/asm/assemble.h @@ -0,0 +1,11 @@ +#pragma once + +#include "eval.h" +#include "parse.h" +#include "report.h" +#include + +uint16_t pline_assemble(OperandEvaluator* evaluator, + uint16_t* object, + const PLine* line, + Reporter* rep); diff --git a/asm/eval.c b/asm/eval.c new file mode 100644 index 0000000..b82d336 --- /dev/null +++ b/asm/eval.c @@ -0,0 +1,304 @@ +#include "eval.h" +#include "parse.h" +#include "report.h" +#include "resolve.h" +#include +#include +#include +#include + +static inline uint16_t eval_poperandty_unary(POperandTy ty, uint16_t operand) +{ + switch (ty) { + case PoTy_Not: + return ~operand; + case PoTy_Negate: + return (uint16_t)-(int16_t)operand; + default: + return 0; + } +} + +static inline uint16_t eval_poperandty_binary( + POperandTy ty, uint16_t left, uint16_t right) +{ + switch (ty) { + case PoTy_Or: + return left | right; + case PoTy_Xor: + return left ^ right; + case PoTy_And: + return left & right; + case PoTy_Shl: + return (uint16_t)(left << right); + case PoTy_Shr: + return (uint16_t)(left >> right); + case PoTy_Add: + return (uint16_t)((int16_t)left + (int16_t)right); + case PoTy_Sub: + return (uint16_t)((int16_t)left - (int16_t)right); + case PoTy_Mul: + return (uint16_t)((int16_t)left * (int16_t)right); + case PoTy_Div: + return (uint16_t)((int16_t)left / (int16_t)right); + case PoTy_Mod: + return (uint16_t)((int16_t)left % (int16_t)right); + default: + return 0; + } +} + +EvaledOperand eval_operand_to_imm( + OperandEvaluator* evaluator, POperand* operand) +{ + switch (operand->ty) { + case PoTy_Str: + REPORTF_ERROR("%s", "strings cannot be part of expressions"); + reporter_print_loc(evaluator->rep, operand->loc); + return (EvaledOperand) { .ty = EoTy_Err }; + case PoTy_Mem8: + case PoTy_Mem16: + REPORTF_ERROR("%s", "indirections cannot be part of expressions"); + reporter_print_loc(evaluator->rep, operand->loc); + return (EvaledOperand) { .ty = EoTy_Err }; + case PoTy_Reg: + REPORTF_ERROR("%s", "registers cannot be part of expressions"); + reporter_print_loc(evaluator->rep, operand->loc); + return (EvaledOperand) { .ty = EoTy_Err }; + case PoTy_Imm: + return (EvaledOperand) { .ty = EoTy_Imm, .imm = operand->imm }; + case PoTy_Ident: + case PoTy_SubLabel: { + const IdentResol* re + = ident_resolver_resolve(evaluator->re, operand->str); + if (re == NULL) { + if (!evaluator->second_pass) { + return (EvaledOperand) { .ty = EoTy_Imm, .imm = 0 }; + } + REPORTF_ERROR("undefined identifier '%s'", operand->str); + reporter_print_loc(evaluator->rep, operand->loc); + return (EvaledOperand) { .ty = EoTy_Err }; + } + switch (re->ty) { + case IdentResolTy_None: + break; + case IdentResolTy_Label: + case IdentResolTy_SubLabel: + return (EvaledOperand) { .ty = EoTy_Imm, .imm = re->ip }; + } + fprintf(stderr, "unreachable\n"); + exit(1); + } + case PoTy_Not: + case PoTy_Negate: { + EvaledOperand inner + = eval_operand_to_imm(evaluator, operand->operand); + if (inner.ty == EoTy_Err) { + return inner; + } else if (inner.ty != EoTy_Imm) { + REPORTF_ERROR("%s", "operand cannot be used in expressions"); + reporter_print_loc(evaluator->rep, operand->operand->loc); + return (EvaledOperand) { .ty = EoTy_Err }; + } + return (EvaledOperand) { + .ty = EoTy_Imm, + .imm = eval_poperandty_unary(operand->ty, inner.imm), + }; + } + case PoTy_Or: + case PoTy_Xor: + case PoTy_And: + case PoTy_Shl: + case PoTy_Shr: + case PoTy_Add: + case PoTy_Sub: + case PoTy_Mul: + case PoTy_Div: + case PoTy_Mod: { + EvaledOperand left = eval_operand_to_imm(evaluator, operand->left); + if (left.ty == EoTy_Err) { + return left; + } else if (left.ty != EoTy_Imm) { + REPORTF_ERROR("%s", "operand cannot be used in expressions"); + reporter_print_loc(evaluator->rep, operand->left->loc); + return (EvaledOperand) { .ty = EoTy_Err }; + } + EvaledOperand right + = eval_operand_to_imm(evaluator, operand->right); + if (right.ty == EoTy_Err) { + return right; + } else if (right.ty != EoTy_Imm) { + REPORTF_ERROR("%s", "operand cannot be used in expressions"); + reporter_print_loc(evaluator->rep, operand->right->loc); + return (EvaledOperand) { .ty = EoTy_Err }; + } + return (EvaledOperand) { + .ty = EoTy_Imm, + .imm = eval_poperandty_binary(operand->ty, left.imm, right.imm), + }; + } + } + fprintf(stderr, "unreachable\n"); + exit(1); +} + +EvaledOperand eval_operand_indirection_expr( + OperandEvaluator* evaluator, POperand* operand) +{ + switch (operand->ty) { + case PoTy_Reg: + return (EvaledOperand) { + .ty = EoTy_Mem8Reg, + .reg = operand->reg, + .offset = 0, + }; + case PoTy_Str: + REPORTF_ERROR("%s", "strings cannot be part of indirections"); + reporter_print_loc(evaluator->rep, operand->loc); + return (EvaledOperand) { .ty = EoTy_Err }; + case PoTy_Mem8: + case PoTy_Mem16: + REPORTF_ERROR("%s", "indirections cannot be part of indirections"); + reporter_print_loc(evaluator->rep, operand->loc); + return (EvaledOperand) { .ty = EoTy_Err }; + case PoTy_Imm: + case PoTy_Ident: + case PoTy_SubLabel: + case PoTy_Not: + case PoTy_Negate: + case PoTy_Or: + case PoTy_Xor: + case PoTy_And: + case PoTy_Shl: + case PoTy_Shr: + case PoTy_Mul: + case PoTy_Div: + case PoTy_Mod: { + EvaledOperand evaled = eval_operand_to_imm(evaluator, operand); + if (evaled.ty == EoTy_Err) { + return evaled; + } + return (EvaledOperand) { + .ty = EoTy_Mem8Imm, + .imm = evaled.imm, + }; + } + case PoTy_Add: { + if (operand->left->ty == PoTy_Reg) { + EvaledOperand right + = eval_operand_to_imm(evaluator, operand->right); + if (right.ty == EoTy_Err) { + return right; + } + return (EvaledOperand) { + .ty = EoTy_Mem8Imm, + .reg = operand->left->reg, + .offset = right.imm, + }; + } else if (operand->right->ty == PoTy_Reg) { + EvaledOperand left + = eval_operand_to_imm(evaluator, operand->left); + if (left.ty == EoTy_Err) { + return left; + } + return (EvaledOperand) { + .ty = EoTy_Mem8Imm, + .reg = operand->right->reg, + .offset = left.imm, + }; + } else { + EvaledOperand evaled = eval_operand_to_imm(evaluator, operand); + if (evaled.ty == EoTy_Err) { + return evaled; + } + return (EvaledOperand) { + .ty = EoTy_Mem8Imm, + .imm = evaled.imm, + }; + } + break; + } + case PoTy_Sub: { + if (operand->left->ty == PoTy_Reg) { + EvaledOperand right + = eval_operand_to_imm(evaluator, operand->right); + if (right.ty == EoTy_Err) { + return right; + } + return (EvaledOperand) { + .ty = EoTy_Mem8Imm, + .reg = operand->left->reg, + .offset = (uint16_t)-(int16_t)right.imm, + }; + } else if (operand->right->ty == PoTy_Reg) { + EvaledOperand left + = eval_operand_to_imm(evaluator, operand->left); + if (left.ty == EoTy_Err) { + return left; + } + return (EvaledOperand) { + .ty = EoTy_Mem8Imm, + .reg = operand->right->reg, + .offset = (uint16_t)-(int16_t)left.imm, + }; + } else { + EvaledOperand evaled = eval_operand_to_imm(evaluator, operand); + if (evaled.ty == EoTy_Err) { + return evaled; + } + return (EvaledOperand) { + .ty = EoTy_Mem8Imm, + .imm = evaled.imm, + }; + } + break; + } + } + fprintf(stderr, "unreachable\n"); + exit(1); +} + +EvaledOperand eval_operand(OperandEvaluator* evaluator, POperand* operand) +{ + switch (operand->ty) { + case PoTy_Str: + return (EvaledOperand) { .ty = EoTy_Str }; + case PoTy_Mem8: + return eval_operand_indirection_expr(evaluator, operand->operand); + case PoTy_Mem16: { + EvaledOperand evaled + = eval_operand_indirection_expr(evaluator, operand->operand); + switch (evaled.ty) { + case EoTy_Mem8Reg: + evaled.ty = EoTy_MemU16Reg; + break; + case EoTy_Mem8Imm: + evaled.ty = EoTy_MemU16Imm; + break; + default: + break; + } + return evaled; + } + case PoTy_Reg: + return (EvaledOperand) { .ty = EoTy_Reg, .reg = operand->reg }; + case PoTy_Imm: + case PoTy_Ident: + case PoTy_SubLabel: + case PoTy_Not: + case PoTy_Negate: + case PoTy_Or: + case PoTy_Xor: + case PoTy_And: + case PoTy_Shl: + case PoTy_Shr: + case PoTy_Add: + case PoTy_Sub: + case PoTy_Mul: + case PoTy_Div: + case PoTy_Mod: + return eval_operand_to_imm(evaluator, operand); + } + fprintf(stderr, "unreachable\n"); + exit(1); +} diff --git a/asm/eval.h b/asm/eval.h new file mode 100644 index 0000000..c7e42ec --- /dev/null +++ b/asm/eval.h @@ -0,0 +1,39 @@ +#pragma once + +#include "common/arch.h" +#include "parse.h" +#include "resolve.h" +#include +#include + +typedef enum { + EoTy_Err, + EoTy_Reg, + EoTy_Imm, + EoTy_Str, + EoTy_Mem8Reg, + EoTy_Mem8Imm, + EoTy_MemU16Reg, + EoTy_MemU16Imm, +} EvaledOperandTy; + +typedef struct { + EvaledOperandTy ty; + union { + Reg reg; + uint16_t imm; + }; + uint16_t offset; +} EvaledOperand; + +typedef struct { + IdentResolver* re; + Reporter* rep; + bool second_pass; +} OperandEvaluator; + +EvaledOperand eval_operand_to_imm( + OperandEvaluator* evaluator, POperand* operand); +EvaledOperand eval_operand_indirection_expr( + OperandEvaluator* evaluator, POperand* operand); +EvaledOperand eval_operand(OperandEvaluator* evaluator, POperand* operand); diff --git a/asm/lex.c b/asm/lex.c new file mode 100644 index 0000000..08e4b7c --- /dev/null +++ b/asm/lex.c @@ -0,0 +1,178 @@ +#include "lex.h" +#include "report.h" +#include "str.h" +#include + +void lexer_construct(Lexer* lexer, const char* filename, const char* text) +{ + *lexer = (Lexer) { + .filename = filename, + .text = text, + .text_len = strlen(text), + .idx = 0, + .line = 1, + .col = 1, + .ch = text[0], + .error_occured = false, + }; +} + +static inline bool lexer_done(const Lexer* lexer) +{ + return lexer->idx >= lexer->text_len; +} + +static inline void lexer_step(Lexer* lexer) +{ + if (lexer_done(lexer)) { + return; + } + if (lexer->ch == '\n') { + lexer->line += 1; + lexer->col = 1; + } else { + lexer->col += 1; + } + lexer->idx += 1; + lexer->ch = lexer->text[lexer->idx]; +} + +static inline Loc lexer_loc(const Lexer* lexer) +{ + return (Loc) { .idx = lexer->idx, .line = lexer->line, .col = lexer->col }; +} + +static inline Tok lexer_tok(const Lexer* lexer, TokTy ty, Loc loc) +{ + return (Tok) { .ty = ty, .loc = loc, .len = lexer->idx - loc.idx }; +} + +static inline int lexer_skip_literal_char(Lexer* lexer) +{ + char ch = lexer->ch; + lexer_step(lexer); + if (ch == '\\') { + if (lexer_done(lexer)) + return -1; + lexer_step(lexer); + } + return 0; +} + +static inline void lexer_report(Lexer* lexer, const char* msg, Loc loc) +{ + lexer->error_occured = true; + REPORTF_ERROR("%s", msg); + print_report_loc(lexer->filename, lexer->text, lexer->text_len, loc); +} + +Tok lexer_next(Lexer* lexer) +{ + const char* ident_chars = "abcdefghijklmnopqrstuvwxyz" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ_$"; + const char* int_chars = "1234567890"; + const char* hex_chars = "01234567889abcdefABCDEF"; + + Loc loc = lexer_loc(lexer); + if (lexer_done(lexer)) { + return lexer_tok(lexer, TT_Eof, loc); + } + if (lexer->ch == '\n') { + lexer_step(lexer); + return lexer_tok(lexer, '\n', loc); + } else if (str_includes(" \t", lexer->ch)) { + while (!lexer_done(lexer) && str_includes(" \t", lexer->ch)) { + lexer_step(lexer); + } + return lexer_next(lexer); + } else if (str_includes(ident_chars, lexer->ch)) { + while (!lexer_done(lexer) + && (str_includes(ident_chars, lexer->ch) + || str_includes(int_chars, lexer->ch))) { + lexer_step(lexer); + } + return lexer_tok(lexer, TT_Ident, loc); + } else if (str_includes(int_chars, lexer->ch) && lexer->ch != '0') { + while (!lexer_done(lexer) && (str_includes(int_chars, lexer->ch))) { + lexer_step(lexer); + } + return lexer_tok(lexer, TT_Int, loc); + } else if (lexer->ch == ';') { + while (!lexer_done(lexer) && lexer->ch != '\n') { + lexer_step(lexer); + } + return lexer_next(lexer); + } else if (lexer->ch == '0') { + lexer_step(lexer); + if (lexer->ch == 'b') { + lexer_step(lexer); + if (lexer_done(lexer) || !str_includes("01", lexer->ch)) { + lexer_report(lexer, "malformed binary literal", loc); + return lexer_tok(lexer, TT_Err, loc); + } + while (!lexer_done(lexer) && str_includes("01", lexer->ch)) { + lexer_step(lexer); + } + return lexer_tok(lexer, TT_Binary, loc); + } else if (lexer->ch == 'x') { + lexer_step(lexer); + if (lexer_done(lexer) || !str_includes(hex_chars, lexer->ch)) { + lexer_report(lexer, "malformed hex literal", loc); + return lexer_tok(lexer, TT_Err, loc); + } + while (!lexer_done(lexer) && str_includes(hex_chars, lexer->ch)) { + lexer_step(lexer); + } + return lexer_tok(lexer, TT_Hex, loc); + + } else { + return lexer_tok(lexer, TT_Int, loc); + } + } else if (lexer->ch == '\'') { + lexer_step(lexer); + lexer_skip_literal_char(lexer); + if (lexer_done(lexer) || lexer->ch != '\'') { + lexer_report(lexer, "malformed character literal", loc); + return lexer_tok(lexer, TT_Err, loc); + } + lexer_step(lexer); + return lexer_tok(lexer, TT_Char, loc); + } else if (lexer->ch == '"') { + lexer_step(lexer); + while (!lexer_done(lexer) && lexer->ch != '"') { + lexer_skip_literal_char(lexer); + } + if (lexer_done(lexer) || lexer->ch != '"') { + lexer_report(lexer, "malformed string literal", loc); + return lexer_tok(lexer, TT_Err, loc); + } + lexer_step(lexer); + return lexer_tok(lexer, TT_Str, loc); + } else if (lexer->ch == '<') { + lexer_step(lexer); + if (!lexer_done(lexer) && lexer->ch == '<') { + lexer_step(lexer); + return lexer_tok(lexer, TT_DoubleLt, loc); + } else { + lexer_report(lexer, "expected '<'", loc); + return lexer_tok(lexer, TT_Err, loc); + } + } else if (lexer->ch == '>') { + lexer_step(lexer); + if (!lexer_done(lexer) && lexer->ch == '>') { + lexer_step(lexer); + return lexer_tok(lexer, TT_DoubleGt, loc); + } else { + lexer_report(lexer, "expected '>'", loc); + return lexer_tok(lexer, TT_Err, loc); + } + } else if (str_includes("|^&+-*/%()[].,:!", lexer->ch)) { + char ch = lexer->ch; + lexer_step(lexer); + return lexer_tok(lexer, (TokTy)ch, loc); + } else { + lexer_report(lexer, "illegal character", loc); + lexer_step(lexer); + return lexer_tok(lexer, TT_Err, loc); + } +} diff --git a/asm/lex.h b/asm/lex.h new file mode 100644 index 0000000..542b404 --- /dev/null +++ b/asm/lex.h @@ -0,0 +1,55 @@ +#pragma once + +#include "report.h" +#include +#include + +typedef enum { + TT_Err, + TT_Eof, + TT_Ident, + TT_Int, + TT_Binary, + TT_Hex, + TT_Char, + TT_Str, + TT_Newline = '\n', + TT_DoubleLt, + TT_DoubleGt, + TT_Pipe = '|', + TT_Hat = '^', + TT_Ampersand = '&', + TT_Plus = '+', + TT_Minus = '-', + TT_Asterisk = '*', + TT_Slash = '/', + TT_Percent = '%', + TT_LParen = '(', + TT_RParen = ')', + TT_LBracket = '[', + TT_RBracket = ']', + TT_Dot = '.', + TT_Comma = ',', + TT_Colon = ':', + TT_Exclamation = '!', +} TokTy; + +typedef struct { + TokTy ty; + Loc loc; + size_t len; +} Tok; + +typedef struct { + const char* filename; + const char* text; + size_t text_len; + size_t idx; + int line; + int col; + char ch; + bool error_occured; +} Lexer; + +void lexer_construct(Lexer* lexer, const char* filename, const char* text); +Tok lexer_next(Lexer* lexer); diff --git a/asm/main.c b/asm/main.c index 04165aa..0c244c1 100644 --- a/asm/main.c +++ b/asm/main.c @@ -1,5 +1,8 @@ -#include "asm/asm.h" -#include "common/arch.h" +#include "assemble.h" +#include "eval.h" +#include "parse.h" +#include "resolve.h" +#include "str.h" #include #include #include @@ -8,1025 +11,6 @@ #include #include -static inline bool str_includes(const char* str, char ch) -{ - for (size_t i = 0; str[i] != '\0'; ++i) { - if (str[i] == ch) { - return true; - } - } - return false; -} - -static inline char* asm_strdup(const char* str) -{ - size_t len = strlen(str); - char* val = calloc(len + 1, sizeof(char)); - strncpy(val, str, len); - return val; -} - -static inline char* asm_strndup(const char* str, size_t len) -{ - char* val = calloc(len + 1, sizeof(char)); - strncpy(val, str, len); - return val; -} - -typedef struct { - size_t idx; - int line; - int col; -} Loc; - -#define REPORTF_ERROR(FMT, ...) \ - (fprintf( \ - stderr, "\x1b[1;91merror\x1b[1;97m: " FMT "\x1b[0m\n", __VA_ARGS__)) -#define REPORTF_INFO(FMT, ...) \ - (fprintf(stderr, "\x1b[1;96minfo\x1b[1;97m: " FMT "\x1b[0m\n", __VA_ARGS__)) -#define REPORTF_WARNING(FMT, ...) \ - (fprintf( \ - stderr, "\x1b[1;93mwarning\x1b[1;97m: " FMT "\x1b[0m\n", __VA_ARGS__)) - -static inline void print_report_loc( - const char* filename, const char* text, size_t text_len, Loc loc) -{ - size_t line_start = loc.idx; - while (line_start > 0 && text[line_start] != '\n') { - line_start -= 1; - } - if (text[line_start] == '\n') { - line_start += 1; - } - size_t line_end = loc.idx + 1; - while (line_end < text_len && text[line_end] != '\n') { - line_end += 1; - } - const char* line = &text[line_start]; - int line_len = (int)line_end - (int)line_start; - - fprintf(stderr, - " \x1b[96m--> ./%s:%d:%d\n " - "\x1b[37m|\n\x1b[96m%5d\x1b[37m|%.*s\n " - "|%*c\x1b[1;91m^\x1b[0m\n", - filename, - loc.line, - loc.col, - loc.line, - line_len, - line, - loc.col - 1, - ' '); -} - -typedef struct { - const char* filename; - const char* text; - size_t text_len; -} Reporter; - -void reporter_print_loc(Reporter* rep, Loc loc) -{ - print_report_loc(rep->filename, rep->text, rep->text_len, loc); -} - -void reporter_error_with_loc(Reporter* rep, const char* msg, Loc loc) -{ - REPORTF_ERROR("%s", msg); - reporter_print_loc(rep, loc); -} - -typedef enum { - TT_Err, - TT_Eof, - TT_Ident, - TT_Int, - TT_Binary, - TT_Hex, - TT_Char, - TT_Str, - TT_Newline = '\n', - TT_DoubleLt, - TT_DoubleGt, - TT_Pipe = '|', - TT_Hat = '^', - TT_Ampersand = '&', - TT_Plus = '+', - TT_Minus = '-', - TT_Asterisk = '*', - TT_Slash = '/', - TT_Percent = '%', - TT_LParen = '(', - TT_RParen = ')', - TT_LBracket = '[', - TT_RBracket = ']', - TT_Dot = '.', - TT_Comma = ',', - TT_Colon = ':', - TT_Exclamation = '!', -} TokTy; - -typedef struct { - TokTy ty; - Loc loc; - size_t len; -} Tok; - -typedef struct { - const char* filename; - const char* text; - size_t text_len; - size_t idx; - int line; - int col; - char ch; - bool error_occured; -} Lexer; - -void lexer_construct(Lexer* lexer, const char* filename, const char* text) -{ - *lexer = (Lexer) { - .filename = filename, - .text = text, - .text_len = strlen(text), - .idx = 0, - .line = 1, - .col = 1, - .ch = text[0], - .error_occured = false, - }; -} - -static inline bool lexer_done(const Lexer* lexer) -{ - return lexer->idx >= lexer->text_len; -} - -static inline void lexer_step(Lexer* lexer) -{ - if (lexer_done(lexer)) { - return; - } - if (lexer->ch == '\n') { - lexer->line += 1; - lexer->col = 1; - } else { - lexer->col += 1; - } - lexer->idx += 1; - lexer->ch = lexer->text[lexer->idx]; -} - -static inline Loc lexer_loc(const Lexer* lexer) -{ - return (Loc) { .idx = lexer->idx, .line = lexer->line, .col = lexer->col }; -} - -static inline Tok lexer_tok(const Lexer* lexer, TokTy ty, Loc loc) -{ - return (Tok) { .ty = ty, .loc = loc, .len = lexer->idx - loc.idx }; -} - -static inline int lexer_skip_literal_char(Lexer* lexer) -{ - char ch = lexer->ch; - lexer_step(lexer); - if (ch == '\\') { - if (lexer_done(lexer)) - return -1; - lexer_step(lexer); - } - return 0; -} - -static inline void lexer_report(Lexer* lexer, const char* msg, Loc loc) -{ - lexer->error_occured = true; - REPORTF_ERROR("%s", msg); - print_report_loc(lexer->filename, lexer->text, lexer->text_len, loc); -} - -Tok lexer_next(Lexer* lexer) -{ - const char* ident_chars = "abcdefghijklmnopqrstuvwxyz" - "ABCDEFGHIJKLMNOPQRSTUVWXYZ_$"; - const char* int_chars = "1234567890"; - const char* hex_chars = "01234567889abcdefABCDEF"; - - Loc loc = lexer_loc(lexer); - if (lexer_done(lexer)) { - return lexer_tok(lexer, TT_Eof, loc); - } - if (lexer->ch == '\n') { - lexer_step(lexer); - return lexer_tok(lexer, '\n', loc); - } else if (str_includes(" \t", lexer->ch)) { - while (!lexer_done(lexer) && str_includes(" \t", lexer->ch)) { - lexer_step(lexer); - } - return lexer_next(lexer); - } else if (str_includes(ident_chars, lexer->ch)) { - while (!lexer_done(lexer) - && (str_includes(ident_chars, lexer->ch) - || str_includes(int_chars, lexer->ch))) { - lexer_step(lexer); - } - return lexer_tok(lexer, TT_Ident, loc); - } else if (str_includes(int_chars, lexer->ch) && lexer->ch != '0') { - while (!lexer_done(lexer) && (str_includes(int_chars, lexer->ch))) { - lexer_step(lexer); - } - return lexer_tok(lexer, TT_Int, loc); - } else if (lexer->ch == ';') { - while (!lexer_done(lexer) && lexer->ch != '\n') { - lexer_step(lexer); - } - return lexer_next(lexer); - } else if (lexer->ch == '0') { - lexer_step(lexer); - if (lexer->ch == 'b') { - lexer_step(lexer); - if (lexer_done(lexer) || !str_includes("01", lexer->ch)) { - lexer_report(lexer, "malformed binary literal", loc); - return lexer_tok(lexer, TT_Err, loc); - } - while (!lexer_done(lexer) && str_includes("01", lexer->ch)) { - lexer_step(lexer); - } - return lexer_tok(lexer, TT_Binary, loc); - } else if (lexer->ch == 'x') { - lexer_step(lexer); - if (lexer_done(lexer) || !str_includes(hex_chars, lexer->ch)) { - lexer_report(lexer, "malformed hex literal", loc); - return lexer_tok(lexer, TT_Err, loc); - } - while (!lexer_done(lexer) && str_includes(hex_chars, lexer->ch)) { - lexer_step(lexer); - } - return lexer_tok(lexer, TT_Hex, loc); - - } else { - return lexer_tok(lexer, TT_Int, loc); - } - } else if (lexer->ch == '\'') { - lexer_step(lexer); - lexer_skip_literal_char(lexer); - if (lexer_done(lexer) || lexer->ch != '\'') { - lexer_report(lexer, "malformed character literal", loc); - return lexer_tok(lexer, TT_Err, loc); - } - lexer_step(lexer); - return lexer_tok(lexer, TT_Char, loc); - } else if (lexer->ch == '"') { - lexer_step(lexer); - while (!lexer_done(lexer) && lexer->ch != '"') { - lexer_skip_literal_char(lexer); - } - if (lexer_done(lexer) || lexer->ch != '"') { - lexer_report(lexer, "malformed string literal", loc); - return lexer_tok(lexer, TT_Err, loc); - } - lexer_step(lexer); - return lexer_tok(lexer, TT_Str, loc); - } else if (lexer->ch == '<') { - lexer_step(lexer); - if (!lexer_done(lexer) && lexer->ch == '<') { - lexer_step(lexer); - return lexer_tok(lexer, TT_DoubleLt, loc); - } else { - lexer_report(lexer, "expected '<'", loc); - return lexer_tok(lexer, TT_Err, loc); - } - } else if (lexer->ch == '>') { - lexer_step(lexer); - if (!lexer_done(lexer) && lexer->ch == '>') { - lexer_step(lexer); - return lexer_tok(lexer, TT_DoubleGt, loc); - } else { - lexer_report(lexer, "expected '>'", loc); - return lexer_tok(lexer, TT_Err, loc); - } - } else if (str_includes("|^&+-*/%()[].,:!", lexer->ch)) { - char ch = lexer->ch; - lexer_step(lexer); - return lexer_tok(lexer, (TokTy)ch, loc); - } else { - lexer_report(lexer, "illegal character", loc); - lexer_step(lexer); - return lexer_tok(lexer, TT_Err, loc); - } -} - -typedef struct PLabel PLabel; - -struct PLabel { - PLabel* next; - char* ident; - Loc loc; - bool sub_label; -}; - -PLabel* plabel_new(PLabel* next, char* ident, bool sub_label, Loc loc) -{ - PLabel* label = malloc(sizeof(PLabel)); - *label = (PLabel) { next, ident, loc, sub_label }; - return label; -} -void plabel_free(PLabel* label) -{ - if (!label) { - return; - } - plabel_free(label->next); - free(label->ident); - free(label); -} - -typedef enum { - PoTy_Reg, - PoTy_Imm, - PoTy_Ident, - PoTy_SubLabel, - PoTy_Str, - PoTy_Mem8, - PoTy_Mem16, - PoTy_Not, - PoTy_Negate, - PoTy_Or, - PoTy_Xor, - PoTy_And, - PoTy_Shl, - PoTy_Shr, - PoTy_Add, - PoTy_Sub, - PoTy_Mul, - PoTy_Div, - PoTy_Mod, -} POperandTy; - -typedef struct POperand POperand; - -struct POperand { - POperandTy ty; - Loc loc; - union { - Reg reg; - uint16_t imm; - struct { - char* str; - size_t str_len; - }; - POperand* operand; - struct { - POperand* left; - POperand* right; - }; - }; -}; - -POperand* poperand_new_reg(Reg reg, Loc loc) -{ - POperand* operand = malloc(sizeof(POperand)); - *operand = (POperand) { .ty = PoTy_Reg, .loc = loc, .reg = reg }; - return operand; -} -POperand* poperand_new_imm(uint16_t imm, Loc loc) -{ - POperand* operand = malloc(sizeof(POperand)); - *operand = (POperand) { .ty = PoTy_Imm, .loc = loc, .imm = imm }; - return operand; -} -POperand* poperand_new_str(POperandTy ty, char* str, size_t str_len, Loc loc) -{ - POperand* operand = malloc(sizeof(POperand)); - *operand = (POperand) { - .ty = ty, - .loc = loc, - .str = str, - .str_len = str_len, - }; - return operand; -} -POperand* poperand_new_unary(POperandTy ty, POperand* inner, Loc loc) -{ - POperand* operand = malloc(sizeof(POperand)); - *operand = (POperand) { .ty = ty, .loc = loc, .operand = inner }; - return operand; -} -POperand* poperand_new_binary( - POperandTy ty, POperand* left, POperand* right, Loc loc) -{ - POperand* operand = malloc(sizeof(POperand)); - *operand - = (POperand) { .ty = ty, .loc = loc, .left = left, .right = right }; - return operand; -} -void poperand_free(POperand* operand) -{ - switch (operand->ty) { - case PoTy_Reg: - case PoTy_Imm: - break; - case PoTy_Ident: - case PoTy_SubLabel: - case PoTy_Str: - free(operand->str); - break; - case PoTy_Mem8: - case PoTy_Mem16: - case PoTy_Not: - case PoTy_Negate: - poperand_free(operand->operand); - break; - case PoTy_Or: - case PoTy_Xor: - case PoTy_And: - case PoTy_Shl: - case PoTy_Shr: - case PoTy_Add: - case PoTy_Sub: - case PoTy_Mul: - case PoTy_Div: - case PoTy_Mod: - poperand_free(operand->left); - poperand_free(operand->right); - break; - } - free(operand); -} - -typedef struct { - PLabel* labels; - char* op; - Loc loc; - size_t ops_size; - POperand* ops[]; -} PLine; - -PLine* pline_new( - char* op, PLabel* labels, Loc loc, size_t ops_size, POperand** ops) -{ - PLine* line = malloc(sizeof(PLine) + sizeof(POperand*) * ops_size); - *line = (PLine) { - .labels = labels, - .op = op, - .loc = loc, - .ops_size = ops_size, - }; - for (size_t i = 0; i < ops_size; ++i) { - line->ops[i] = ops[i]; - } - return line; -} -void pline_free(PLine* pline) -{ - plabel_free(pline->labels); - free(pline->op); - for (size_t i = 0; i < pline->ops_size; ++i) { - poperand_free(pline->ops[i]); - } - free(pline); -} - -typedef enum { - PStmtTy_Line, - PStmtTy_Global, - PStmtTy_Extern, - PStmtTy_Define, -} PStmtTy; - -typedef struct { - PStmtTy ty; - union { - PLine* line; - char* ident; - }; -} PStmt; - -void pstmt_free(PStmt* stmt) -{ - switch (stmt->ty) { - case PStmtTy_Line: - pline_free(stmt->line); - break; - case PStmtTy_Global: - case PStmtTy_Extern: - case PStmtTy_Define: - free(stmt->ident); - break; - } - free(stmt); -} - -typedef struct { - Lexer lexer; - Tok tok; - Tok eaten; - bool error_occured; -} Parser; - -void parser_construct(Parser* parser, const char* filename, const char* text) -{ - Lexer lexer; - lexer_construct(&lexer, filename, text); - - *parser = (Parser) { - .lexer = lexer, - .tok = lexer_next(&lexer), - .eaten = (Tok) { 0 }, - .error_occured = false, - }; -} - -bool parser_done(const Parser* parser) -{ - return parser->tok.ty == TT_Eof; -} -bool parser_error_occured(const Parser* parser) -{ - return parser->error_occured || parser->lexer.error_occured; -} - -static inline void parser_step(Parser* parser) -{ - parser->tok = lexer_next(&parser->lexer); -} -static inline bool parser_test(const Parser* parser, TokTy ty) -{ - return parser->tok.ty == ty; -} -static inline bool parser_eat(Parser* parser, TokTy ty) -{ - if (parser_test(parser, ty)) { - parser->eaten = parser->tok; - parser_step(parser); - return true; - } - return false; -} -static inline char* parser_ident_val(const Parser* parser, Tok tok) -{ - return asm_strndup(&parser->lexer.text[tok.loc.idx], tok.len); -} -static inline void parser_report(Parser* parser, const char* msg, Loc loc) -{ - parser->error_occured = true; - REPORTF_ERROR("%s", msg); - print_report_loc(parser->lexer.filename, - parser->lexer.text, - parser->lexer.text_len, - loc); -} - -static inline void parser_skip_newlines(Parser* parser) -{ - while (parser_eat(parser, '\n')) { } -} - -static inline PLabel* parser_parse_labels( - Parser* parser, char** ident, Loc* ident_loc) -{ - *ident = NULL; - PLabel* labels = NULL; - while (parser->tok.ty != TT_Eof && *ident == NULL) { - parser_skip_newlines(parser); - Loc loc = parser->tok.loc; - if (parser_eat(parser, '.')) { - if (!parser_eat(parser, TT_Ident)) { - parser_report(parser, "expected identifier", parser->tok.loc); - plabel_free(labels); - return NULL; - } - char* label_ident = parser_ident_val(parser, parser->eaten); - if (!parser_eat(parser, ':')) { - parser_report(parser, "expected ':'", parser->tok.loc); - plabel_free(labels); - free(label_ident); - return NULL; - } - labels = plabel_new(labels, label_ident, true, loc); - } else if (parser_eat(parser, TT_Ident)) { - *ident = parser_ident_val(parser, parser->eaten); - *ident_loc = loc; - if (!parser_eat(parser, ':')) { - break; - } - labels = plabel_new(labels, *ident, false, loc); - *ident = NULL; - } else { - parser_report( - parser, "expected identifier or ':'", parser->tok.loc); - plabel_free(labels); - return NULL; - } - } - return labels; -} - -static inline char literal_char_val(const char* str) -{ - if (str[0] == '\\') { - switch (str[1]) { - case '0': - return 0; - case 't': - return '\t'; - case 'n': - return '\n'; - default: - return str[1]; - } - } else { - return str[0]; - } -} - -static const int parser_binary_prec = 6; -static inline POperand* parser_parse_operand_2(Parser* parser, int prec); - -static inline POperand* parser_parse_operand_0(Parser* parser) -{ - Loc loc = parser->tok.loc; - if (parser_eat(parser, TT_Ident)) { - char* ident = parser_ident_val(parser, parser->eaten); - const char* reg_key[10] = { - "r0", "r1", "r2", "r3", "r4", "rbp", "rsp", "rfl", "rcs", "rip" - }; - Reg reg_val[10] = { R0, R1, R2, R3, R4, Rbp, Rsp, Rfl, Rcs, Rip }; - for (size_t i = 0; i < 10; ++i) { - if (strcmp(reg_key[i], ident) == 0) { - free(ident); - return poperand_new_reg(reg_val[i], loc); - } - } - return poperand_new_str(PoTy_Ident, ident, parser->eaten.len, loc); - } else if (parser_eat(parser, TT_Int)) { - char* str = parser_ident_val(parser, parser->eaten); - uint64_t val = strtoull(str, NULL, 10); - free(str); - if (val > 0xffff) { - parser_report(parser, - "integers larger than 65536 not supported", - parser->tok.loc); - return NULL; - } - uint16_t imm = (uint16_t)val; - return poperand_new_imm(imm, loc); - } else if (parser_eat(parser, TT_Binary)) { - char* str = parser_ident_val(parser, parser->eaten); - uint64_t val = strtoull(&str[2], NULL, 2); - free(str); - if (val > 0xffff) { - parser_report(parser, - "integers larger than 65536 not supported", - parser->tok.loc); - return NULL; - } - uint16_t imm = (uint16_t)val; - return poperand_new_imm(imm, loc); - } else if (parser_eat(parser, TT_Hex)) { - char* str = parser_ident_val(parser, parser->eaten); - uint64_t val = strtoull(&str[2], NULL, 16); - free(str); - if (val > 0xffff) { - parser_report(parser, - "integers larger than 65536 not supported", - parser->tok.loc); - return NULL; - } - uint16_t imm = (uint16_t)val; - return poperand_new_imm(imm, loc); - } else if (parser_eat(parser, TT_Char)) { - char* str = parser_ident_val(parser, parser->eaten); - uint16_t imm = (uint16_t)literal_char_val(&str[1]); - free(str); - return poperand_new_imm(imm, loc); - } else if (parser_eat(parser, TT_Str)) { - char* lit = parser_ident_val(parser, parser->eaten); - size_t lit_len = strlen(lit); - char* str = calloc(lit_len - 1, sizeof(char)); - size_t str_len = 0; - for (size_t i = 1; i < lit_len - 2; ++i) { - str[i] = literal_char_val(&lit[i]); - } - free(lit); - return poperand_new_str(PoTy_Str, str, str_len, loc); - } else if (parser_eat(parser, '.')) { - if (!parser_eat(parser, TT_Ident)) { - parser_report(parser, "expected identifier", parser->tok.loc); - return NULL; - } - char* ident = parser_ident_val(parser, parser->eaten); - return poperand_new_str(PoTy_SubLabel, ident, parser->eaten.len, loc); - } else if (parser_eat(parser, '(')) { - POperand* operand = parser_parse_operand_2(parser, parser_binary_prec); - if (!parser_eat(parser, ')')) { - parser_report(parser, "expected ')'", parser->tok.loc); - poperand_free(operand); - return NULL; - } - return operand; - } else { - parser_report(parser, "expected operand", parser->tok.loc); - return NULL; - } -} - -static inline POperand* parser_parse_operand_1(Parser* parser) -{ - - Loc loc = parser->tok.loc; - if (parser_eat(parser, '-')) { - POperand* operand = parser_parse_operand_1(parser); - return poperand_new_unary(PoTy_Negate, operand, loc); - } else if (parser_eat(parser, '!')) { - POperand* operand = parser_parse_operand_1(parser); - return poperand_new_unary(PoTy_Not, operand, loc); - } else { - return parser_parse_operand_0(parser); - } -} - -static inline POperand* parser_parse_operand_2(Parser* parser, int prec) -{ - const POperandTy op_tys[] = { - PoTy_Or, - PoTy_Xor, - PoTy_And, - PoTy_Shr, - PoTy_Shl, - PoTy_Add, - PoTy_Sub, - PoTy_Mul, - PoTy_Div, - PoTy_Mod, - }; - const TokTy op_tts[] = { - '|', - '^', - '&', - TT_DoubleGt, - TT_DoubleLt, - '+', - '-', - '*', - '/', - '%', - }; - const int op_precs[] = { 6, 5, 4, 3, 3, 2, 2, 1, 1, 1 }; - static_assert(sizeof(op_tys) / sizeof(op_tys[0]) - == sizeof(op_tts) / sizeof(op_tts[0]), - "misaligned"); - static_assert(sizeof(op_tys) / sizeof(op_tys[0]) - == sizeof(op_precs) / sizeof(op_precs[0]), - "misaligned"); - - if (prec == 0) { - return parser_parse_operand_1(parser); - } - POperand* left = parser_parse_operand_2(parser, prec - 1); - bool should_continue = true; - while (should_continue) { - should_continue = false; - for (size_t i = 0; i < sizeof(op_tys) / sizeof(op_tys[0]); ++i) { - if (prec >= op_precs[i] && parser_eat(parser, op_tts[i])) { - POperand* right = parser_parse_operand_2(parser, prec - 1); - left = poperand_new_binary(op_tys[i], left, right, left->loc); - should_continue = true; - break; - } - } - } - return left; -} - -static inline POperand* parser_parse_operand_3(Parser* parser) -{ - Loc loc = parser->tok.loc; - if (parser_eat(parser, TT_LBracket)) { - parser_report(parser, "expected 'u8' or 'u16' before '['", loc); - return NULL; - } - if (!parser_test(parser, TT_Ident)) { - return parser_parse_operand_2(parser, parser_binary_prec); - } - char* ident = parser_ident_val(parser, parser->tok); - if (strcmp(ident, "u8") == 0) { - free(ident); - parser_step(parser); - if (!parser_eat(parser, '[')) { - parser_report(parser, "expected '['", parser->tok.loc); - return NULL; - } - POperand* operand = parser_parse_operand_2(parser, parser_binary_prec); - if (!parser_eat(parser, ']')) { - parser_report(parser, "expected ']'", parser->tok.loc); - poperand_free(operand); - return NULL; - } - return poperand_new_unary(PoTy_Mem8, operand, loc); - } else if (strcmp(ident, "u16") == 0) { - free(ident); - parser_step(parser); - if (!parser_eat(parser, '[')) { - parser_report(parser, "expected '['", parser->tok.loc); - return NULL; - } - POperand* operand = parser_parse_operand_2(parser, parser_binary_prec); - if (!parser_eat(parser, ']')) { - parser_report(parser, "expected ']'", parser->tok.loc); - poperand_free(operand); - return NULL; - } - return poperand_new_unary(PoTy_Mem16, operand, loc); - } else { - free(ident); - return parser_parse_operand_2(parser, parser_binary_prec); - } -} - -static inline void parser_skip_to_next_line(Parser* parser) -{ - while (!parser_done(parser) && !parser_eat(parser, TT_Newline)) { - parser_step(parser); - } -} - -PLine* parser_next(Parser* parser) -{ - char* ident; - Loc loc; - PLabel* labels = parser_parse_labels(parser, &ident, &loc); - - const size_t max_ops_size = 64; - // TODO: Move allocation out-of-band. - POperand** ops = malloc(sizeof(POperand) * max_ops_size); - size_t ops_size = 0; - - if (!parser_test(parser, TT_Eof) && !parser_test(parser, '\n')) { - POperand* operand = parser_parse_operand_3(parser); - if (!operand) { - parser_skip_to_next_line(parser); - goto error_free_ops; - } - ops[ops_size++] = operand; - while (!parser_test(parser, TT_Eof) && !parser_test(parser, '\n') - && ops_size < 3) { - if (ops_size >= max_ops_size) { - parser_report(parser, - "exceeded maximum number of operands (64)", - parser->tok.loc); - parser_skip_to_next_line(parser); - goto error_free_ops; - } - if (!parser_eat(parser, ',')) { - parser_report(parser, "expected ','", parser->tok.loc); - parser_skip_to_next_line(parser); - goto error_free_ops; - } - POperand* operand = parser_parse_operand_3(parser); - if (!operand) { - parser_skip_to_next_line(parser); - goto error_free_ops; - } - ops[ops_size++] = operand; - } - } - if (!parser_eat(parser, '\n') && !parser_test(parser, TT_Eof)) { - parser_report(parser, "expected newline", parser->tok.loc); - goto error_free_ops; - } - parser_skip_newlines(parser); - - PLine* line = pline_new(ident, labels, loc, ops_size, ops); - free(ops); - return line; - -error_free_ops: - for (size_t i = 0; i < ops_size; ++i) - if (ops[i]) - poperand_free(ops[i]); - free(ops); - plabel_free(labels); - free(ident); - return NULL; -} - -typedef enum { - IdentResolTy_None, - IdentResolTy_Label, - IdentResolTy_SubLabel, -} IdentResolTy; - -typedef struct IdentResol IdentResol; -struct IdentResol { - char* ident; - Loc loc; - const IdentResol* parent; - IdentResolTy ty; - union { - uint16_t ip; - }; -}; - -void ident_resol_destroy(IdentResol* resol) -{ - switch (resol->ty) { - case IdentResolTy_None: - break; - case IdentResolTy_Label: - case IdentResolTy_SubLabel: - free(resol->ident); - break; - } -} - -typedef struct IdentResolver IdentResolver; - -struct IdentResolver { - IdentResol* resols; - size_t resols_capacity; - size_t resols_size; - const IdentResol* current_parent; -}; - -void ident_resolver_construct(IdentResolver* resolver) -{ - size_t capacity = 512; - *resolver = (IdentResolver) { - .resols = malloc(sizeof(IdentResol) * capacity), - .resols_capacity = capacity, - .resols_size = 0, - }; -} - -void ident_resolver_destroy(IdentResolver* resolver) -{ - for (size_t i = 0; i < resolver->resols_size; ++i) { - ident_resol_destroy(&resolver->resols[i]); - } - free(resolver->resols); -} - -static inline size_t ident_resolver_first_empty(IdentResolver* resolver) -{ - size_t i = 0; - for (; i < resolver->resols_size; ++i) { - if (resolver->resols[i].ty == IdentResolTy_None) { - break; - } - } - if (i >= resolver->resols_size) { - if (resolver->resols_size + 1 > resolver->resols_capacity) { - resolver->resols_capacity *= 2; - resolver->resols = realloc(resolver->resols, - sizeof(IdentResol) * resolver->resols_capacity); - } - resolver->resols_size += 1; - } - return i; -} - -void ident_resolver_define_label( - IdentResolver* resolver, char* ident, Loc loc, uint16_t asm_ip) -{ - size_t i = ident_resolver_first_empty(resolver); - resolver->resols[i] = (IdentResol) { - .ident = ident, - .loc = loc, - .ty = IdentResolTy_Label, - .ip = asm_ip * 2, - }; - resolver->current_parent = &resolver->resols[i]; -} - -void ident_resolver_define_sublabel( - IdentResolver* resolver, char* ident, Loc loc, uint16_t asm_ip) -{ - size_t i = ident_resolver_first_empty(resolver); - resolver->resols[i] = (IdentResol) { - .ident = ident, - .loc = loc, - .parent = resolver->current_parent, - .ty = IdentResolTy_SubLabel, - .ip = asm_ip * 2, - }; -} - -const IdentResol* ident_resolver_resolve( - const IdentResolver* resolver, const char* ident) -{ - for (size_t i = resolver->resols_size; i > 0; --i) { - IdentResol* re = &resolver->resols[i - 1]; - if (re->ty != IdentResolTy_None && strcmp(re->ident, ident) == 0 - && (re->ty != IdentResolTy_SubLabel - || re->parent == resolver->current_parent)) { - return re; - } - } - return NULL; -} - static inline int define_labels( IdentResolver* resolver, PLabel* label, uint16_t asm_ip, Reporter* rep) { @@ -1062,1006 +46,6 @@ static inline void use_labels(IdentResolver* resolver, PLabel* label) } } -typedef enum { - EoTy_Err, - EoTy_Reg, - EoTy_Imm, - EoTy_Str, - EoTy_Mem8Reg, - EoTy_Mem8Imm, - EoTy_MemU16Reg, - EoTy_MemU16Imm, -} EvaledOperandTy; - -typedef struct { - EvaledOperandTy ty; - union { - Reg reg; - uint16_t imm; - }; - uint16_t offset; -} EvaledOperand; - -typedef struct { - IdentResolver* re; - Reporter* rep; - bool second_pass; -} OperandEvaluator; - -static inline uint16_t eval_poperandty_unary(POperandTy ty, uint16_t operand) -{ - switch (ty) { - case PoTy_Not: - return ~operand; - case PoTy_Negate: - return (uint16_t)-(int16_t)operand; - default: - return 0; - } -} - -static inline uint16_t eval_poperandty_binary( - POperandTy ty, uint16_t left, uint16_t right) -{ - switch (ty) { - case PoTy_Or: - return left | right; - case PoTy_Xor: - return left ^ right; - case PoTy_And: - return left & right; - case PoTy_Shl: - return (uint16_t)(left << right); - case PoTy_Shr: - return (uint16_t)(left >> right); - case PoTy_Add: - return (uint16_t)((int16_t)left + (int16_t)right); - case PoTy_Sub: - return (uint16_t)((int16_t)left - (int16_t)right); - case PoTy_Mul: - return (uint16_t)((int16_t)left * (int16_t)right); - case PoTy_Div: - return (uint16_t)((int16_t)left / (int16_t)right); - case PoTy_Mod: - return (uint16_t)((int16_t)left % (int16_t)right); - default: - return 0; - } -} - -EvaledOperand eval_operand_to_imm( - OperandEvaluator* evaluator, POperand* operand) -{ - switch (operand->ty) { - case PoTy_Str: - REPORTF_ERROR("%s", "strings cannot be part of expressions"); - reporter_print_loc(evaluator->rep, operand->loc); - return (EvaledOperand) { .ty = EoTy_Err }; - case PoTy_Mem8: - case PoTy_Mem16: - REPORTF_ERROR("%s", "indirections cannot be part of expressions"); - reporter_print_loc(evaluator->rep, operand->loc); - return (EvaledOperand) { .ty = EoTy_Err }; - case PoTy_Reg: - REPORTF_ERROR("%s", "registers cannot be part of expressions"); - reporter_print_loc(evaluator->rep, operand->loc); - return (EvaledOperand) { .ty = EoTy_Err }; - case PoTy_Imm: - return (EvaledOperand) { .ty = EoTy_Imm, .imm = operand->imm }; - case PoTy_Ident: - case PoTy_SubLabel: { - const IdentResol* re - = ident_resolver_resolve(evaluator->re, operand->str); - if (re == NULL) { - if (!evaluator->second_pass) { - return (EvaledOperand) { .ty = EoTy_Imm, .imm = 0 }; - } - REPORTF_ERROR("undefined identifier '%s'", operand->str); - reporter_print_loc(evaluator->rep, operand->loc); - return (EvaledOperand) { .ty = EoTy_Err }; - } - switch (re->ty) { - case IdentResolTy_None: - break; - case IdentResolTy_Label: - case IdentResolTy_SubLabel: - return (EvaledOperand) { .ty = EoTy_Imm, .imm = re->ip }; - } - fprintf(stderr, "unreachable\n"); - exit(1); - } - case PoTy_Not: - case PoTy_Negate: { - EvaledOperand inner - = eval_operand_to_imm(evaluator, operand->operand); - if (inner.ty == EoTy_Err) { - return inner; - } else if (inner.ty != EoTy_Imm) { - REPORTF_ERROR("%s", "operand cannot be used in expressions"); - reporter_print_loc(evaluator->rep, operand->operand->loc); - return (EvaledOperand) { .ty = EoTy_Err }; - } - return (EvaledOperand) { - .ty = EoTy_Imm, - .imm = eval_poperandty_unary(operand->ty, inner.imm), - }; - } - case PoTy_Or: - case PoTy_Xor: - case PoTy_And: - case PoTy_Shl: - case PoTy_Shr: - case PoTy_Add: - case PoTy_Sub: - case PoTy_Mul: - case PoTy_Div: - case PoTy_Mod: { - EvaledOperand left = eval_operand_to_imm(evaluator, operand->left); - if (left.ty == EoTy_Err) { - return left; - } else if (left.ty != EoTy_Imm) { - REPORTF_ERROR("%s", "operand cannot be used in expressions"); - reporter_print_loc(evaluator->rep, operand->left->loc); - return (EvaledOperand) { .ty = EoTy_Err }; - } - EvaledOperand right - = eval_operand_to_imm(evaluator, operand->right); - if (right.ty == EoTy_Err) { - return right; - } else if (right.ty != EoTy_Imm) { - REPORTF_ERROR("%s", "operand cannot be used in expressions"); - reporter_print_loc(evaluator->rep, operand->right->loc); - return (EvaledOperand) { .ty = EoTy_Err }; - } - return (EvaledOperand) { - .ty = EoTy_Imm, - .imm = eval_poperandty_binary(operand->ty, left.imm, right.imm), - }; - } - } - fprintf(stderr, "unreachable\n"); - exit(1); -} - -EvaledOperand eval_operand_indirection_expr( - OperandEvaluator* evaluator, POperand* operand) -{ - switch (operand->ty) { - case PoTy_Reg: - return (EvaledOperand) { - .ty = EoTy_Mem8Reg, - .reg = operand->reg, - .offset = 0, - }; - case PoTy_Str: - REPORTF_ERROR("%s", "strings cannot be part of indirections"); - reporter_print_loc(evaluator->rep, operand->loc); - return (EvaledOperand) { .ty = EoTy_Err }; - case PoTy_Mem8: - case PoTy_Mem16: - REPORTF_ERROR("%s", "indirections cannot be part of indirections"); - reporter_print_loc(evaluator->rep, operand->loc); - return (EvaledOperand) { .ty = EoTy_Err }; - case PoTy_Imm: - case PoTy_Ident: - case PoTy_SubLabel: - case PoTy_Not: - case PoTy_Negate: - case PoTy_Or: - case PoTy_Xor: - case PoTy_And: - case PoTy_Shl: - case PoTy_Shr: - case PoTy_Mul: - case PoTy_Div: - case PoTy_Mod: { - EvaledOperand evaled = eval_operand_to_imm(evaluator, operand); - if (evaled.ty == EoTy_Err) { - return evaled; - } - return (EvaledOperand) { - .ty = EoTy_Mem8Imm, - .imm = evaled.imm, - }; - } - case PoTy_Add: { - if (operand->left->ty == PoTy_Reg) { - EvaledOperand right - = eval_operand_to_imm(evaluator, operand->right); - if (right.ty == EoTy_Err) { - return right; - } - return (EvaledOperand) { - .ty = EoTy_Mem8Imm, - .reg = operand->left->reg, - .offset = right.imm, - }; - } else if (operand->right->ty == PoTy_Reg) { - EvaledOperand left - = eval_operand_to_imm(evaluator, operand->left); - if (left.ty == EoTy_Err) { - return left; - } - return (EvaledOperand) { - .ty = EoTy_Mem8Imm, - .reg = operand->right->reg, - .offset = left.imm, - }; - } else { - EvaledOperand evaled = eval_operand_to_imm(evaluator, operand); - if (evaled.ty == EoTy_Err) { - return evaled; - } - return (EvaledOperand) { - .ty = EoTy_Mem8Imm, - .imm = evaled.imm, - }; - } - break; - } - case PoTy_Sub: { - if (operand->left->ty == PoTy_Reg) { - EvaledOperand right - = eval_operand_to_imm(evaluator, operand->right); - if (right.ty == EoTy_Err) { - return right; - } - return (EvaledOperand) { - .ty = EoTy_Mem8Imm, - .reg = operand->left->reg, - .offset = (uint16_t)-(int16_t)right.imm, - }; - } else if (operand->right->ty == PoTy_Reg) { - EvaledOperand left - = eval_operand_to_imm(evaluator, operand->left); - if (left.ty == EoTy_Err) { - return left; - } - return (EvaledOperand) { - .ty = EoTy_Mem8Imm, - .reg = operand->right->reg, - .offset = (uint16_t)-(int16_t)left.imm, - }; - } else { - EvaledOperand evaled = eval_operand_to_imm(evaluator, operand); - if (evaled.ty == EoTy_Err) { - return evaled; - } - return (EvaledOperand) { - .ty = EoTy_Mem8Imm, - .imm = evaled.imm, - }; - } - break; - } - } - fprintf(stderr, "unreachable\n"); - exit(1); -} - -EvaledOperand eval_operand(OperandEvaluator* evaluator, POperand* operand) -{ - switch (operand->ty) { - case PoTy_Str: - return (EvaledOperand) { .ty = EoTy_Str }; - case PoTy_Mem8: - return eval_operand_indirection_expr(evaluator, operand->operand); - case PoTy_Mem16: { - EvaledOperand evaled - = eval_operand_indirection_expr(evaluator, operand->operand); - switch (evaled.ty) { - case EoTy_Mem8Reg: - evaled.ty = EoTy_MemU16Reg; - break; - case EoTy_Mem8Imm: - evaled.ty = EoTy_MemU16Imm; - break; - default: - break; - } - return evaled; - } - case PoTy_Reg: - return (EvaledOperand) { .ty = EoTy_Reg, .reg = operand->reg }; - case PoTy_Imm: - case PoTy_Ident: - case PoTy_SubLabel: - case PoTy_Not: - case PoTy_Negate: - case PoTy_Or: - case PoTy_Xor: - case PoTy_And: - case PoTy_Shl: - case PoTy_Shr: - case PoTy_Add: - case PoTy_Sub: - case PoTy_Mul: - case PoTy_Div: - case PoTy_Mod: - return eval_operand_to_imm(evaluator, operand); - } - fprintf(stderr, "unreachable\n"); - exit(1); -} - -typedef enum { - // clang-format off - M_err, M_d8, M_d16, M_nop, M_hlt, M_jmp, - M_jmpf, M_jnz, M_cmp, M_mov, M_in, M_out, - M_call, M_callf, M_ret, M_retf, M_lit, M_int, - M_iret, M_or, M_xor, M_and, M_shl, M_rshl, - M_shr, M_rshr, M_add, M_sub, M_rsub, M_mul, - M_imul, M_div, M_idiv, M_rdiv, M_ridiv, M_mod, - M_rmod, M_push, M_pop - // clang-format on -} Mnemonic; - -const char* mnemonic_str[] = { - // clang-format off - "err", "d8", "d16", "nop", "hlt", "jmp", - "jmpf", "jnz", "cmp", "mov", "in", "out", - "call", "callf", "ret", "retf", "lit", "int", - "iret", "or", "xor", "and", "shl", "rshl", - "shr", "rshr", "add", "sub", "rsub", "mul", - "imul", "div", "idiv", "rdiv", "ridiv", "mod", - "rmod", "push", "pop" - // clang-format on -}; - -static inline uint16_t pline_assemble(OperandEvaluator* evaluator, - uint16_t* object, - const PLine* line, - Reporter* rep) -{ - -#define CHECK_OPERAND(OP) \ - do { \ - if ((OP).ty == EoTy_Err) { \ - return 0; \ - } \ - } while (0) - -#define ASSEMBLE_ONE(RVAL) \ - do { \ - Line l = (RVAL); \ - return assemble_line(object, &l); \ - } while (0) - - size_t mnemonics_amount = sizeof(mnemonic_str) / sizeof(mnemonic_str[0]); - Mnemonic m = M_err; - for (size_t i = 0; i < mnemonics_amount; ++i) { - if (strcmp(mnemonic_str[i], line->op) == 0) { - m = (Mnemonic)i; - break; - } - } - switch (m) { - case M_err: { - REPORTF_ERROR("unrecognized mnemonic '%s'", line->op); - reporter_print_loc(rep, line->loc); - return 0; - } - case M_d8: { - if (line->ops_size > 64) { - reporter_error_with_loc( - rep, "too many operands (max is 64)", line->loc); - return 0; - } - size_t buffer_capacity = 128; - uint8_t* buffer = malloc(sizeof(uint8_t) * buffer_capacity); - size_t buffer_size = 0; - for (size_t i = 0; i < line->ops_size; ++i) { - EvaledOperand val = eval_operand(evaluator, line->ops[i]); - CHECK_OPERAND(val); - switch (val.ty) { - case EoTy_Imm: - buffer[buffer_size++] = (uint8_t)val.imm; - break; - case EoTy_Str: { - for (size_t si = 0; si < line->ops[i]->str_len; ++si) { - buffer[buffer_size++] - = (uint8_t)line->ops[i]->str[si]; - } - break; - } - default: - reporter_error_with_loc( - rep, "invalid operand", line->ops[i]->loc); - return 0; - } - } - uint16_t ip_diff = 0; - for (size_t i = 0; i < buffer_size; i += 2) { - uint16_t data = 0; - // XXX: little endian - data |= buffer[i]; - if (i + 1 < buffer_size) { - data |= (uint16_t)((uint16_t)buffer[i] << 8); - } - Line l = s_data_i(data); - ip_diff += assemble_line(object, &l); - } - return ip_diff; - } - case M_d16: { - if (line->ops_size > 32) { - reporter_error_with_loc( - rep, "too many operands (max is 32)", line->loc); - return 0; - } - uint16_t ip_diff = 0; - for (size_t i = 0; i < line->ops_size; ++i) { - EvaledOperand val = eval_operand(evaluator, line->ops[i]); - CHECK_OPERAND(val); - switch (val.ty) { - case EoTy_Imm: { - Line l = s_data_i(val.imm); - ip_diff += assemble_line(object, &l); - break; - } - default: - reporter_error_with_loc( - rep, "invalid operand", line->ops[i]->loc); - return 0; - } - } - return ip_diff; - } - case M_nop: - if (line->ops_size == 0) - ASSEMBLE_ONE(s_nop()); - break; - case M_hlt: - if (line->ops_size == 0) - ASSEMBLE_ONE(s_hlt()); - break; - case M_jmp: - if (line->ops_size == 1) { - EvaledOperand op1 = eval_operand(evaluator, line->ops[0]); - CHECK_OPERAND(op1); - if (op1.ty == EoTy_Reg) - ASSEMBLE_ONE(s_jmp_r(op1.reg)); - if (op1.ty == EoTy_Imm) - ASSEMBLE_ONE(s_jmp_i(op1.imm)); - } - break; - case M_jmpf: - if (line->ops_size == 2) { - EvaledOperand op1 = eval_operand(evaluator, line->ops[0]); - CHECK_OPERAND(op1); - EvaledOperand op2 = eval_operand(evaluator, line->ops[1]); - CHECK_OPERAND(op2); - if (op1.ty == EoTy_Reg) { - if (op2.ty == EoTy_Reg) - ASSEMBLE_ONE(s_jmpf_r_r(op1.reg, op2.reg)); - if (op2.ty == EoTy_Imm) - ASSEMBLE_ONE(s_jmpf_r_i(op1.reg, op2.imm)); - } - if (op1.ty == EoTy_Imm) { - if (op2.ty == EoTy_Reg) - ASSEMBLE_ONE(s_jmpf_i_r(op1.imm, op2.reg)); - if (op2.ty == EoTy_Imm) - ASSEMBLE_ONE(s_jmpf_i_i(op1.imm, op2.imm)); - } - } - break; - case M_jnz: - if (line->ops_size == 2) { - EvaledOperand op1 = eval_operand(evaluator, line->ops[0]); - CHECK_OPERAND(op1); - EvaledOperand op2 = eval_operand(evaluator, line->ops[1]); - CHECK_OPERAND(op2); - if (op1.ty == EoTy_Reg) { - if (op2.ty == EoTy_Reg) - ASSEMBLE_ONE(s_jnz_r(op1.reg, op2.reg)); - if (op2.ty == EoTy_Imm) - ASSEMBLE_ONE(s_jnz_i(op1.reg, op2.imm)); - } - } - break; - case M_cmp: - if (line->ops_size == 2) { - EvaledOperand op1 = eval_operand(evaluator, line->ops[0]); - CHECK_OPERAND(op1); - EvaledOperand op2 = eval_operand(evaluator, line->ops[1]); - CHECK_OPERAND(op2); - if (op1.ty == EoTy_Reg) { - if (op2.ty == EoTy_Reg) - ASSEMBLE_ONE(s_cmp_r(op1.reg, op2.reg)); - if (op2.ty == EoTy_Imm) - ASSEMBLE_ONE(s_cmp_i(op1.reg, op2.imm)); - } - } - break; - case M_mov: - if (line->ops_size == 2) { - EvaledOperand op1 = eval_operand(evaluator, line->ops[0]); - CHECK_OPERAND(op1); - EvaledOperand op2 = eval_operand(evaluator, line->ops[1]); - CHECK_OPERAND(op2); - if (op1.ty == EoTy_Reg) { - if (op2.ty == EoTy_Reg) - ASSEMBLE_ONE(s_mov16_r_r(op1.reg, op2.reg)); - if (op2.ty == EoTy_Imm) - ASSEMBLE_ONE(s_mov16_r_i(op1.reg, op2.imm)); - if (op2.ty == EoTy_Mem8Reg) - ASSEMBLE_ONE(s_mov8_r_mr(op1.reg, op2.reg, op2.offset)); - if (op2.ty == EoTy_Mem8Imm) - ASSEMBLE_ONE(s_mov8_r_mi(op1.reg, op2.imm)); - if (op2.ty == EoTy_MemU16Reg) - ASSEMBLE_ONE( - s_mov16_r_mr(op1.reg, op2.reg, op2.offset)); - if (op2.ty == EoTy_MemU16Imm) { - ASSEMBLE_ONE(s_mov16_r_mi(op1.reg, op2.imm)); - } - } - if (op1.ty == EoTy_Mem8Reg) { - if (op2.ty == EoTy_Reg) - ASSEMBLE_ONE(s_mov8_mr_r(op1.reg, op1.offset, op2.reg)); - if (op2.ty == EoTy_Imm) - ASSEMBLE_ONE(s_mov8_mr_i(op1.reg, op1.offset, op2.imm)); - } - if (op1.ty == EoTy_Mem8Imm) { - if (op2.ty == EoTy_Reg) - ASSEMBLE_ONE(s_mov8_mi_r(op1.imm, op2.reg)); - if (op2.ty == EoTy_Imm) - ASSEMBLE_ONE(s_mov8_mi_i(op1.imm, op2.imm)); - } - if (op1.ty == EoTy_MemU16Reg) { - if (op2.ty == EoTy_Reg) - ASSEMBLE_ONE( - s_mov16_mr_r(op1.reg, op1.offset, op2.reg)); - if (op2.ty == EoTy_Imm) - ASSEMBLE_ONE( - s_mov16_mr_i(op1.reg, op1.offset, op2.imm)); - } - if (op1.ty == EoTy_MemU16Imm) { - if (op2.ty == EoTy_Reg) - ASSEMBLE_ONE(s_mov16_mi_r(op1.imm, op2.reg)); - if (op2.ty == EoTy_Imm) - ASSEMBLE_ONE(s_mov16_mi_i(op1.imm, op2.imm)); - } - } - break; - case M_in: - if (line->ops_size == 2) { - EvaledOperand dst = eval_operand(evaluator, line->ops[0]); - CHECK_OPERAND(dst); - EvaledOperand op1 = eval_operand(evaluator, line->ops[1]); - CHECK_OPERAND(op1); - if (dst.ty == EoTy_Reg) { - if (op1.ty == EoTy_Reg) - ASSEMBLE_ONE(s_in_r(dst.reg, op1.reg)); - if (op1.ty == EoTy_Imm) - ASSEMBLE_ONE(s_in_i(dst.reg, op1.imm)); - } - } - break; - case M_out: - if (line->ops_size == 2) { - EvaledOperand op1 = eval_operand(evaluator, line->ops[0]); - CHECK_OPERAND(op1); - EvaledOperand op2 = eval_operand(evaluator, line->ops[1]); - CHECK_OPERAND(op2); - if (op1.ty == EoTy_Reg) { - if (op2.ty == EoTy_Reg) - ASSEMBLE_ONE(s_out_r_r(op1.reg, op2.reg)); - if (op2.ty == EoTy_Imm) - ASSEMBLE_ONE(s_out_r_i(op1.reg, op2.imm)); - } - if (op1.ty == EoTy_Imm) { - if (op2.ty == EoTy_Reg) - ASSEMBLE_ONE(s_out_i_r(op1.imm, op2.reg)); - if (op2.ty == EoTy_Imm) - ASSEMBLE_ONE(s_out_i_i(op1.imm, op2.imm)); - } - } - break; - case M_call: - if (line->ops_size == 1) { - EvaledOperand op1 = eval_operand(evaluator, line->ops[0]); - CHECK_OPERAND(op1); - if (op1.ty == EoTy_Reg) - ASSEMBLE_ONE(s_call_r(op1.reg)); - if (op1.ty == EoTy_Imm) - ASSEMBLE_ONE(s_call_i(op1.imm)); - } - break; - case M_callf: - if (line->ops_size == 2) { - EvaledOperand op1 = eval_operand(evaluator, line->ops[0]); - CHECK_OPERAND(op1); - EvaledOperand op2 = eval_operand(evaluator, line->ops[1]); - CHECK_OPERAND(op2); - if (op1.ty == EoTy_Reg) { - if (op2.ty == EoTy_Reg) - ASSEMBLE_ONE(s_callf_r_r(op1.reg, op2.reg)); - if (op2.ty == EoTy_Imm) - ASSEMBLE_ONE(s_callf_r_i(op1.reg, op2.imm)); - } - if (op1.ty == EoTy_Imm) { - if (op2.ty == EoTy_Reg) - ASSEMBLE_ONE(s_callf_i_r(op1.imm, op2.reg)); - if (op2.ty == EoTy_Imm) - ASSEMBLE_ONE(s_callf_i_i(op1.imm, op2.imm)); - } - } - break; - case M_ret: - if (line->ops_size == 0) { - ASSEMBLE_ONE(s_ret()); - } - break; - case M_retf: - if (line->ops_size == 0) { - ASSEMBLE_ONE(s_retf()); - } - break; - case M_lit: - if (line->ops_size == 1) { - EvaledOperand op1 = eval_operand(evaluator, line->ops[0]); - CHECK_OPERAND(op1); - if (op1.ty == EoTy_Reg) - ASSEMBLE_ONE(s_lit_r(op1.reg)); - if (op1.ty == EoTy_Imm) - ASSEMBLE_ONE(s_lit_i(op1.imm)); - } - break; - case M_int: - if (line->ops_size == 1) { - EvaledOperand op1 = eval_operand(evaluator, line->ops[0]); - CHECK_OPERAND(op1); - if (op1.ty == EoTy_Imm) { - if (op1.imm > 0xff) { - reporter_error_with_loc( - rep, "interrupt id exceeds 1 byte", line->loc); - return 0; - } - ASSEMBLE_ONE(s_int((uint8_t)op1.imm)); - } - } - break; - case M_iret: - if (line->ops_size == 0) { - ASSEMBLE_ONE(s_iret()); - } - break; - case M_or: - if (line->ops_size == 3) { - EvaledOperand dst = eval_operand(evaluator, line->ops[0]); - CHECK_OPERAND(dst); - EvaledOperand op1 = eval_operand(evaluator, line->ops[1]); - CHECK_OPERAND(op1); - EvaledOperand op2 = eval_operand(evaluator, line->ops[2]); - CHECK_OPERAND(op2); - if (dst.ty == EoTy_Reg && op1.ty == EoTy_Reg) { - if (op2.ty == EoTy_Reg) - ASSEMBLE_ONE(s_or_r(dst.reg, op1.reg, op2.reg)); - if (op2.ty == EoTy_Imm) { - ASSEMBLE_ONE(s_or_i(dst.reg, op1.reg, op2.imm)); - } - } - } - break; - case M_xor: - if (line->ops_size == 3) { - EvaledOperand dst = eval_operand(evaluator, line->ops[0]); - CHECK_OPERAND(dst); - EvaledOperand op1 = eval_operand(evaluator, line->ops[1]); - CHECK_OPERAND(op1); - EvaledOperand op2 = eval_operand(evaluator, line->ops[2]); - CHECK_OPERAND(op2); - if (dst.ty == EoTy_Reg && op1.ty == EoTy_Reg) { - if (op2.ty == EoTy_Reg) - ASSEMBLE_ONE(s_xor_r(dst.reg, op1.reg, op2.reg)); - if (op2.ty == EoTy_Imm) - ASSEMBLE_ONE(s_xor_i(dst.reg, op1.reg, op2.imm)); - } - } - break; - case M_and: - if (line->ops_size == 3) { - EvaledOperand dst = eval_operand(evaluator, line->ops[0]); - CHECK_OPERAND(dst); - EvaledOperand op1 = eval_operand(evaluator, line->ops[1]); - CHECK_OPERAND(op1); - EvaledOperand op2 = eval_operand(evaluator, line->ops[2]); - CHECK_OPERAND(op2); - if (dst.ty == EoTy_Reg && op1.ty == EoTy_Reg) { - if (op2.ty == EoTy_Reg) - ASSEMBLE_ONE(s_and_r(dst.reg, op1.reg, op2.reg)); - if (op2.ty == EoTy_Imm) - ASSEMBLE_ONE(s_and_i(dst.reg, op1.reg, op2.imm)); - } - } - break; - case M_shl: - if (line->ops_size == 3) { - EvaledOperand dst = eval_operand(evaluator, line->ops[0]); - CHECK_OPERAND(dst); - EvaledOperand op1 = eval_operand(evaluator, line->ops[1]); - CHECK_OPERAND(op1); - EvaledOperand op2 = eval_operand(evaluator, line->ops[2]); - CHECK_OPERAND(op2); - if (dst.ty == EoTy_Reg && op1.ty == EoTy_Reg) { - if (op2.ty == EoTy_Reg) - ASSEMBLE_ONE(s_shl_r(dst.reg, op1.reg, op2.reg)); - if (op2.ty == EoTy_Imm) - ASSEMBLE_ONE(s_shl_i(dst.reg, op1.reg, op2.imm)); - } - } - break; - case M_rshl: - if (line->ops_size == 3) { - EvaledOperand dst = eval_operand(evaluator, line->ops[0]); - CHECK_OPERAND(dst); - EvaledOperand op1 = eval_operand(evaluator, line->ops[1]); - CHECK_OPERAND(op1); - EvaledOperand op2 = eval_operand(evaluator, line->ops[2]); - CHECK_OPERAND(op2); - if (dst.ty == EoTy_Reg && op1.ty == EoTy_Reg) { - if (op2.ty == EoTy_Reg) - ASSEMBLE_ONE(s_rshl_r(dst.reg, op1.reg, op2.reg)); - if (op2.ty == EoTy_Imm) - ASSEMBLE_ONE(s_rshl_i(dst.reg, op1.reg, op2.imm)); - } - } - break; - case M_shr: - if (line->ops_size == 3) { - EvaledOperand dst = eval_operand(evaluator, line->ops[0]); - CHECK_OPERAND(dst); - EvaledOperand op1 = eval_operand(evaluator, line->ops[1]); - CHECK_OPERAND(op1); - EvaledOperand op2 = eval_operand(evaluator, line->ops[2]); - CHECK_OPERAND(op2); - if (dst.ty == EoTy_Reg && op1.ty == EoTy_Reg) { - if (op2.ty == EoTy_Reg) - ASSEMBLE_ONE(s_shr_r(dst.reg, op1.reg, op2.reg)); - if (op2.ty == EoTy_Imm) - ASSEMBLE_ONE(s_shr_i(dst.reg, op1.reg, op2.imm)); - } - } - break; - case M_rshr: - if (line->ops_size == 3) { - EvaledOperand dst = eval_operand(evaluator, line->ops[0]); - CHECK_OPERAND(dst); - EvaledOperand op1 = eval_operand(evaluator, line->ops[1]); - CHECK_OPERAND(op1); - EvaledOperand op2 = eval_operand(evaluator, line->ops[2]); - CHECK_OPERAND(op2); - if (dst.ty == EoTy_Reg && op1.ty == EoTy_Reg) { - if (op2.ty == EoTy_Reg) - ASSEMBLE_ONE(s_rshr_r(dst.reg, op1.reg, op2.reg)); - if (op2.ty == EoTy_Imm) - ASSEMBLE_ONE(s_rshr_i(dst.reg, op1.reg, op2.imm)); - } - } - break; - case M_add: - if (line->ops_size == 3) { - EvaledOperand dst = eval_operand(evaluator, line->ops[0]); - CHECK_OPERAND(dst); - EvaledOperand op1 = eval_operand(evaluator, line->ops[1]); - CHECK_OPERAND(op1); - EvaledOperand op2 = eval_operand(evaluator, line->ops[2]); - CHECK_OPERAND(op2); - if (dst.ty == EoTy_Reg && op1.ty == EoTy_Reg) { - if (op2.ty == EoTy_Reg) - ASSEMBLE_ONE(s_add_r(dst.reg, op1.reg, op2.reg)); - if (op2.ty == EoTy_Imm) - ASSEMBLE_ONE(s_add_i(dst.reg, op1.reg, op2.imm)); - } - } - break; - case M_sub: - if (line->ops_size == 3) { - EvaledOperand dst = eval_operand(evaluator, line->ops[0]); - CHECK_OPERAND(dst); - EvaledOperand op1 = eval_operand(evaluator, line->ops[1]); - CHECK_OPERAND(op1); - EvaledOperand op2 = eval_operand(evaluator, line->ops[2]); - CHECK_OPERAND(op2); - if (dst.ty == EoTy_Reg && op1.ty == EoTy_Reg) { - if (op2.ty == EoTy_Reg) - ASSEMBLE_ONE(s_sub_r(dst.reg, op1.reg, op2.reg)); - if (op2.ty == EoTy_Imm) - ASSEMBLE_ONE(s_sub_i(dst.reg, op1.reg, op2.imm)); - } - } - break; - case M_rsub: - if (line->ops_size == 3) { - EvaledOperand dst = eval_operand(evaluator, line->ops[0]); - CHECK_OPERAND(dst); - EvaledOperand op1 = eval_operand(evaluator, line->ops[1]); - CHECK_OPERAND(op1); - EvaledOperand op2 = eval_operand(evaluator, line->ops[2]); - CHECK_OPERAND(op2); - if (dst.ty == EoTy_Reg && op1.ty == EoTy_Reg) { - if (op2.ty == EoTy_Reg) - ASSEMBLE_ONE(s_rsub_r(dst.reg, op1.reg, op2.reg)); - if (op2.ty == EoTy_Imm) - ASSEMBLE_ONE(s_rsub_i(dst.reg, op1.reg, op2.imm)); - } - } - break; - case M_mul: - if (line->ops_size == 3) { - EvaledOperand dst = eval_operand(evaluator, line->ops[0]); - CHECK_OPERAND(dst); - EvaledOperand op1 = eval_operand(evaluator, line->ops[1]); - CHECK_OPERAND(op1); - EvaledOperand op2 = eval_operand(evaluator, line->ops[2]); - CHECK_OPERAND(op2); - if (dst.ty == EoTy_Reg && op1.ty == EoTy_Reg) { - if (op2.ty == EoTy_Reg) - ASSEMBLE_ONE(s_mul_r(dst.reg, op1.reg, op2.reg)); - if (op2.ty == EoTy_Imm) - ASSEMBLE_ONE(s_mul_i(dst.reg, op1.reg, op2.imm)); - } - } - break; - case M_imul: - if (line->ops_size == 3) { - EvaledOperand dst = eval_operand(evaluator, line->ops[0]); - CHECK_OPERAND(dst); - EvaledOperand op1 = eval_operand(evaluator, line->ops[1]); - CHECK_OPERAND(op1); - EvaledOperand op2 = eval_operand(evaluator, line->ops[2]); - CHECK_OPERAND(op2); - if (dst.ty == EoTy_Reg && op1.ty == EoTy_Reg) { - if (op2.ty == EoTy_Reg) - ASSEMBLE_ONE(s_imul_r(dst.reg, op1.reg, op2.reg)); - if (op2.ty == EoTy_Imm) - ASSEMBLE_ONE(s_imul_i(dst.reg, op1.reg, op2.imm)); - } - } - break; - case M_div: - if (line->ops_size == 3) { - EvaledOperand dst = eval_operand(evaluator, line->ops[0]); - CHECK_OPERAND(dst); - EvaledOperand op1 = eval_operand(evaluator, line->ops[1]); - CHECK_OPERAND(op1); - EvaledOperand op2 = eval_operand(evaluator, line->ops[2]); - CHECK_OPERAND(op2); - if (dst.ty == EoTy_Reg && op1.ty == EoTy_Reg) { - if (op2.ty == EoTy_Reg) - ASSEMBLE_ONE(s_div_r(dst.reg, op1.reg, op2.reg)); - if (op2.ty == EoTy_Imm) - ASSEMBLE_ONE(s_div_i(dst.reg, op1.reg, op2.imm)); - } - } - break; - case M_idiv: - if (line->ops_size == 3) { - EvaledOperand dst = eval_operand(evaluator, line->ops[0]); - CHECK_OPERAND(dst); - EvaledOperand op1 = eval_operand(evaluator, line->ops[1]); - CHECK_OPERAND(op1); - EvaledOperand op2 = eval_operand(evaluator, line->ops[2]); - CHECK_OPERAND(op2); - if (dst.ty == EoTy_Reg && op1.ty == EoTy_Reg) { - if (op2.ty == EoTy_Reg) - ASSEMBLE_ONE(s_idiv_r(dst.reg, op1.reg, op2.reg)); - if (op2.ty == EoTy_Imm) - ASSEMBLE_ONE(s_idiv_i(dst.reg, op1.reg, op2.imm)); - } - } - break; - case M_rdiv: - if (line->ops_size == 3) { - EvaledOperand dst = eval_operand(evaluator, line->ops[0]); - CHECK_OPERAND(dst); - EvaledOperand op1 = eval_operand(evaluator, line->ops[1]); - CHECK_OPERAND(op1); - EvaledOperand op2 = eval_operand(evaluator, line->ops[2]); - CHECK_OPERAND(op2); - if (dst.ty == EoTy_Reg && op1.ty == EoTy_Reg) { - if (op2.ty == EoTy_Reg) - ASSEMBLE_ONE(s_rdiv_r(dst.reg, op1.reg, op2.reg)); - if (op2.ty == EoTy_Imm) - ASSEMBLE_ONE(s_rdiv_i(dst.reg, op1.reg, op2.imm)); - } - } - break; - case M_ridiv: - if (line->ops_size == 3) { - EvaledOperand dst = eval_operand(evaluator, line->ops[0]); - CHECK_OPERAND(dst); - EvaledOperand op1 = eval_operand(evaluator, line->ops[1]); - CHECK_OPERAND(op1); - EvaledOperand op2 = eval_operand(evaluator, line->ops[2]); - CHECK_OPERAND(op2); - if (dst.ty == EoTy_Reg && op1.ty == EoTy_Reg) { - if (op2.ty == EoTy_Reg) - ASSEMBLE_ONE(s_ridiv_r(dst.reg, op1.reg, op2.reg)); - if (op2.ty == EoTy_Imm) - ASSEMBLE_ONE(s_ridiv_i(dst.reg, op1.reg, op2.imm)); - } - } - break; - case M_mod: - if (line->ops_size == 3) { - EvaledOperand dst = eval_operand(evaluator, line->ops[0]); - CHECK_OPERAND(dst); - EvaledOperand op1 = eval_operand(evaluator, line->ops[1]); - CHECK_OPERAND(op1); - EvaledOperand op2 = eval_operand(evaluator, line->ops[2]); - CHECK_OPERAND(op2); - if (dst.ty == EoTy_Reg && op1.ty == EoTy_Reg) { - if (op2.ty == EoTy_Reg) - ASSEMBLE_ONE(s_mod_r(dst.reg, op1.reg, op2.reg)); - if (op2.ty == EoTy_Imm) - ASSEMBLE_ONE(s_mod_i(dst.reg, op1.reg, op2.imm)); - } - } - break; - case M_rmod: - if (line->ops_size == 3) { - EvaledOperand dst = eval_operand(evaluator, line->ops[0]); - CHECK_OPERAND(dst); - EvaledOperand op1 = eval_operand(evaluator, line->ops[1]); - CHECK_OPERAND(op1); - EvaledOperand op2 = eval_operand(evaluator, line->ops[2]); - CHECK_OPERAND(op2); - if (dst.ty == EoTy_Reg && op1.ty == EoTy_Reg) { - if (op2.ty == EoTy_Reg) - ASSEMBLE_ONE(s_rmod_r(dst.reg, op1.reg, op2.reg)); - if (op2.ty == EoTy_Imm) - ASSEMBLE_ONE(s_rmod_i(dst.reg, op1.reg, op2.imm)); - } - } - break; - case M_push: - if (line->ops_size == 1) { - EvaledOperand op1 = eval_operand(evaluator, line->ops[0]); - CHECK_OPERAND(op1); - if (op1.ty == EoTy_Reg) { - uint16_t size = 0; - Line l; - l = s_add_i(Rsp, Rsp, 2); - size += assemble_line(object, &l); - l = s_mov16_mr_r(Rsp, 0, op1.reg); - size += assemble_line(object, &l); - return size; - } - if (op1.ty == EoTy_Imm) { - uint16_t size = 0; - Line l; - l = s_add_i(Rsp, Rsp, 2); - size += assemble_line(object, &l); - l = s_mov16_mr_i(Rsp, 0, op1.imm); - size += assemble_line(object, &l); - return size; - } - } - break; - case M_pop: - if (line->ops_size == 1) { - EvaledOperand op1 = eval_operand(evaluator, line->ops[0]); - CHECK_OPERAND(op1); - if (op1.ty == EoTy_Reg) { - uint16_t size = 0; - Line l; - l = s_mov16_r_mr(op1.reg, Rsp, 0); - size += assemble_line(object, &l); - l = s_sub_i(Rsp, Rsp, 2); - size += assemble_line(object, &l); - return size; - } - } - break; - } - reporter_error_with_loc(rep, "malformed instruction", line->loc); - return 0; - -#undef CHECK_OPERAND -#undef ASSEMBLE_ONE -} - typedef struct { const char* input_file; const char* output_file; diff --git a/asm/parse.c b/asm/parse.c new file mode 100644 index 0000000..51f111c --- /dev/null +++ b/asm/parse.c @@ -0,0 +1,534 @@ +#include "parse.h" +#include "lex.h" +#include "report.h" +#include "str.h" +#include +#include +#include +#include + +PLabel* plabel_new(PLabel* next, char* ident, bool sub_label, Loc loc) +{ + PLabel* label = malloc(sizeof(PLabel)); + *label = (PLabel) { next, ident, loc, sub_label }; + return label; +} + +void plabel_free(PLabel* label) +{ + if (!label) { + return; + } + plabel_free(label->next); + free(label->ident); + free(label); +} + +POperand* poperand_new_reg(Reg reg, Loc loc) +{ + POperand* operand = malloc(sizeof(POperand)); + *operand = (POperand) { .ty = PoTy_Reg, .loc = loc, .reg = reg }; + return operand; +} + +POperand* poperand_new_imm(uint16_t imm, Loc loc) +{ + POperand* operand = malloc(sizeof(POperand)); + *operand = (POperand) { .ty = PoTy_Imm, .loc = loc, .imm = imm }; + return operand; +} + +POperand* poperand_new_str(POperandTy ty, char* str, size_t str_len, Loc loc) +{ + POperand* operand = malloc(sizeof(POperand)); + *operand = (POperand) { + .ty = ty, + .loc = loc, + .str = str, + .str_len = str_len, + }; + return operand; +} + +POperand* poperand_new_unary(POperandTy ty, POperand* inner, Loc loc) +{ + POperand* operand = malloc(sizeof(POperand)); + *operand = (POperand) { .ty = ty, .loc = loc, .operand = inner }; + return operand; +} + +POperand* poperand_new_binary( + POperandTy ty, POperand* left, POperand* right, Loc loc) +{ + POperand* operand = malloc(sizeof(POperand)); + *operand + = (POperand) { .ty = ty, .loc = loc, .left = left, .right = right }; + return operand; +} + +void poperand_free(POperand* operand) +{ + switch (operand->ty) { + case PoTy_Reg: + case PoTy_Imm: + break; + case PoTy_Ident: + case PoTy_SubLabel: + case PoTy_Str: + free(operand->str); + break; + case PoTy_Mem8: + case PoTy_Mem16: + case PoTy_Not: + case PoTy_Negate: + poperand_free(operand->operand); + break; + case PoTy_Or: + case PoTy_Xor: + case PoTy_And: + case PoTy_Shl: + case PoTy_Shr: + case PoTy_Add: + case PoTy_Sub: + case PoTy_Mul: + case PoTy_Div: + case PoTy_Mod: + poperand_free(operand->left); + poperand_free(operand->right); + break; + } + free(operand); +} + +PLine* pline_new( + char* op, PLabel* labels, Loc loc, size_t ops_size, POperand** ops) +{ + PLine* line = malloc(sizeof(PLine) + sizeof(POperand*) * ops_size); + *line = (PLine) { + .labels = labels, + .op = op, + .loc = loc, + .ops_size = ops_size, + }; + for (size_t i = 0; i < ops_size; ++i) { + line->ops[i] = ops[i]; + } + return line; +} + +void pline_free(PLine* pline) +{ + plabel_free(pline->labels); + free(pline->op); + for (size_t i = 0; i < pline->ops_size; ++i) { + poperand_free(pline->ops[i]); + } + free(pline); +} + +void pstmt_free(PStmt* stmt) +{ + switch (stmt->ty) { + case PStmtTy_Line: + pline_free(stmt->line); + break; + case PStmtTy_Global: + case PStmtTy_Extern: + case PStmtTy_Define: + free(stmt->ident); + break; + } + free(stmt); +} + +void parser_construct(Parser* parser, const char* filename, const char* text) +{ + Lexer lexer; + lexer_construct(&lexer, filename, text); + + *parser = (Parser) { + .lexer = lexer, + .tok = lexer_next(&lexer), + .eaten = (Tok) { 0 }, + .error_occured = false, + }; +} + +bool parser_done(const Parser* parser) +{ + return parser->tok.ty == TT_Eof; +} + +bool parser_error_occured(const Parser* parser) +{ + return parser->error_occured || parser->lexer.error_occured; +} + +static inline void parser_step(Parser* parser) +{ + parser->tok = lexer_next(&parser->lexer); +} + +static inline bool parser_test(const Parser* parser, TokTy ty) +{ + return parser->tok.ty == ty; +} + +static inline bool parser_eat(Parser* parser, TokTy ty) +{ + if (parser_test(parser, ty)) { + parser->eaten = parser->tok; + parser_step(parser); + return true; + } + return false; +} + +static inline char* parser_ident_val(const Parser* parser, Tok tok) +{ + return asm_strndup(&parser->lexer.text[tok.loc.idx], tok.len); +} + +static inline void parser_report(Parser* parser, const char* msg, Loc loc) +{ + parser->error_occured = true; + REPORTF_ERROR("%s", msg); + print_report_loc(parser->lexer.filename, + parser->lexer.text, + parser->lexer.text_len, + loc); +} + +static inline void parser_skip_newlines(Parser* parser) +{ + while (parser_eat(parser, '\n')) { } +} + +static inline PLabel* parser_parse_labels( + Parser* parser, char** ident, Loc* ident_loc) +{ + *ident = NULL; + PLabel* labels = NULL; + while (parser->tok.ty != TT_Eof && *ident == NULL) { + parser_skip_newlines(parser); + Loc loc = parser->tok.loc; + if (parser_eat(parser, '.')) { + if (!parser_eat(parser, TT_Ident)) { + parser_report(parser, "expected identifier", parser->tok.loc); + plabel_free(labels); + return NULL; + } + char* label_ident = parser_ident_val(parser, parser->eaten); + if (!parser_eat(parser, ':')) { + parser_report(parser, "expected ':'", parser->tok.loc); + plabel_free(labels); + free(label_ident); + return NULL; + } + labels = plabel_new(labels, label_ident, true, loc); + } else if (parser_eat(parser, TT_Ident)) { + *ident = parser_ident_val(parser, parser->eaten); + *ident_loc = loc; + if (!parser_eat(parser, ':')) { + break; + } + labels = plabel_new(labels, *ident, false, loc); + *ident = NULL; + } else { + parser_report( + parser, "expected identifier or ':'", parser->tok.loc); + plabel_free(labels); + return NULL; + } + } + return labels; +} + +static inline char literal_char_val(const char* str) +{ + if (str[0] == '\\') { + switch (str[1]) { + case '0': + return 0; + case 't': + return '\t'; + case 'n': + return '\n'; + default: + return str[1]; + } + } else { + return str[0]; + } +} + +static const int parser_binary_prec = 6; +static inline POperand* parser_parse_operand_2(Parser* parser, int prec); + +static inline POperand* parser_parse_operand_0(Parser* parser) +{ + Loc loc = parser->tok.loc; + if (parser_eat(parser, TT_Ident)) { + char* ident = parser_ident_val(parser, parser->eaten); + const char* reg_key[10] = { + "r0", "r1", "r2", "r3", "r4", "rbp", "rsp", "rfl", "rcs", "rip" + }; + Reg reg_val[10] = { R0, R1, R2, R3, R4, Rbp, Rsp, Rfl, Rcs, Rip }; + for (size_t i = 0; i < 10; ++i) { + if (strcmp(reg_key[i], ident) == 0) { + free(ident); + return poperand_new_reg(reg_val[i], loc); + } + } + return poperand_new_str(PoTy_Ident, ident, parser->eaten.len, loc); + } else if (parser_eat(parser, TT_Int)) { + char* str = parser_ident_val(parser, parser->eaten); + uint64_t val = strtoull(str, NULL, 10); + free(str); + if (val > 0xffff) { + parser_report(parser, + "integers larger than 65536 not supported", + parser->tok.loc); + return NULL; + } + uint16_t imm = (uint16_t)val; + return poperand_new_imm(imm, loc); + } else if (parser_eat(parser, TT_Binary)) { + char* str = parser_ident_val(parser, parser->eaten); + uint64_t val = strtoull(&str[2], NULL, 2); + free(str); + if (val > 0xffff) { + parser_report(parser, + "integers larger than 65536 not supported", + parser->tok.loc); + return NULL; + } + uint16_t imm = (uint16_t)val; + return poperand_new_imm(imm, loc); + } else if (parser_eat(parser, TT_Hex)) { + char* str = parser_ident_val(parser, parser->eaten); + uint64_t val = strtoull(&str[2], NULL, 16); + free(str); + if (val > 0xffff) { + parser_report(parser, + "integers larger than 65536 not supported", + parser->tok.loc); + return NULL; + } + uint16_t imm = (uint16_t)val; + return poperand_new_imm(imm, loc); + } else if (parser_eat(parser, TT_Char)) { + char* str = parser_ident_val(parser, parser->eaten); + uint16_t imm = (uint16_t)literal_char_val(&str[1]); + free(str); + return poperand_new_imm(imm, loc); + } else if (parser_eat(parser, TT_Str)) { + char* lit = parser_ident_val(parser, parser->eaten); + size_t lit_len = strlen(lit); + char* str = calloc(lit_len - 1, sizeof(char)); + size_t str_len = 0; + for (size_t i = 1; i < lit_len - 2; ++i) { + str[i] = literal_char_val(&lit[i]); + } + free(lit); + return poperand_new_str(PoTy_Str, str, str_len, loc); + } else if (parser_eat(parser, '.')) { + if (!parser_eat(parser, TT_Ident)) { + parser_report(parser, "expected identifier", parser->tok.loc); + return NULL; + } + char* ident = parser_ident_val(parser, parser->eaten); + return poperand_new_str(PoTy_SubLabel, ident, parser->eaten.len, loc); + } else if (parser_eat(parser, '(')) { + POperand* operand = parser_parse_operand_2(parser, parser_binary_prec); + if (!parser_eat(parser, ')')) { + parser_report(parser, "expected ')'", parser->tok.loc); + poperand_free(operand); + return NULL; + } + return operand; + } else { + parser_report(parser, "expected operand", parser->tok.loc); + return NULL; + } +} + +static inline POperand* parser_parse_operand_1(Parser* parser) +{ + + Loc loc = parser->tok.loc; + if (parser_eat(parser, '-')) { + POperand* operand = parser_parse_operand_1(parser); + return poperand_new_unary(PoTy_Negate, operand, loc); + } else if (parser_eat(parser, '!')) { + POperand* operand = parser_parse_operand_1(parser); + return poperand_new_unary(PoTy_Not, operand, loc); + } else { + return parser_parse_operand_0(parser); + } +} + +static inline POperand* parser_parse_operand_2(Parser* parser, int prec) +{ + const POperandTy op_tys[] = { + PoTy_Or, + PoTy_Xor, + PoTy_And, + PoTy_Shr, + PoTy_Shl, + PoTy_Add, + PoTy_Sub, + PoTy_Mul, + PoTy_Div, + PoTy_Mod, + }; + const TokTy op_tts[] = { + '|', + '^', + '&', + TT_DoubleGt, + TT_DoubleLt, + '+', + '-', + '*', + '/', + '%', + }; + const int op_precs[] = { 6, 5, 4, 3, 3, 2, 2, 1, 1, 1 }; + static_assert(sizeof(op_tys) / sizeof(op_tys[0]) + == sizeof(op_tts) / sizeof(op_tts[0]), + "misaligned"); + static_assert(sizeof(op_tys) / sizeof(op_tys[0]) + == sizeof(op_precs) / sizeof(op_precs[0]), + "misaligned"); + + if (prec == 0) { + return parser_parse_operand_1(parser); + } + POperand* left = parser_parse_operand_2(parser, prec - 1); + bool should_continue = true; + while (should_continue) { + should_continue = false; + for (size_t i = 0; i < sizeof(op_tys) / sizeof(op_tys[0]); ++i) { + if (prec >= op_precs[i] && parser_eat(parser, op_tts[i])) { + POperand* right = parser_parse_operand_2(parser, prec - 1); + left = poperand_new_binary(op_tys[i], left, right, left->loc); + should_continue = true; + break; + } + } + } + return left; +} + +static inline POperand* parser_parse_operand_3(Parser* parser) +{ + Loc loc = parser->tok.loc; + if (parser_eat(parser, TT_LBracket)) { + parser_report(parser, "expected 'u8' or 'u16' before '['", loc); + return NULL; + } + if (!parser_test(parser, TT_Ident)) { + return parser_parse_operand_2(parser, parser_binary_prec); + } + char* ident = parser_ident_val(parser, parser->tok); + if (strcmp(ident, "u8") == 0) { + free(ident); + parser_step(parser); + if (!parser_eat(parser, '[')) { + parser_report(parser, "expected '['", parser->tok.loc); + return NULL; + } + POperand* operand = parser_parse_operand_2(parser, parser_binary_prec); + if (!parser_eat(parser, ']')) { + parser_report(parser, "expected ']'", parser->tok.loc); + poperand_free(operand); + return NULL; + } + return poperand_new_unary(PoTy_Mem8, operand, loc); + } else if (strcmp(ident, "u16") == 0) { + free(ident); + parser_step(parser); + if (!parser_eat(parser, '[')) { + parser_report(parser, "expected '['", parser->tok.loc); + return NULL; + } + POperand* operand = parser_parse_operand_2(parser, parser_binary_prec); + if (!parser_eat(parser, ']')) { + parser_report(parser, "expected ']'", parser->tok.loc); + poperand_free(operand); + return NULL; + } + return poperand_new_unary(PoTy_Mem16, operand, loc); + } else { + free(ident); + return parser_parse_operand_2(parser, parser_binary_prec); + } +} + +static inline void parser_skip_to_next_line(Parser* parser) +{ + while (!parser_done(parser) && !parser_eat(parser, TT_Newline)) { + parser_step(parser); + } +} + +PLine* parser_next(Parser* parser) +{ + char* ident; + Loc loc; + PLabel* labels = parser_parse_labels(parser, &ident, &loc); + + const size_t max_ops_size = 64; + // TODO: Move allocation out-of-band. + POperand** ops = malloc(sizeof(POperand) * max_ops_size); + size_t ops_size = 0; + + if (!parser_test(parser, TT_Eof) && !parser_test(parser, '\n')) { + POperand* operand = parser_parse_operand_3(parser); + if (!operand) { + parser_skip_to_next_line(parser); + goto error_free_ops; + } + ops[ops_size++] = operand; + while (!parser_test(parser, TT_Eof) && !parser_test(parser, '\n') + && ops_size < 3) { + if (ops_size >= max_ops_size) { + parser_report(parser, + "exceeded maximum number of operands (64)", + parser->tok.loc); + parser_skip_to_next_line(parser); + goto error_free_ops; + } + if (!parser_eat(parser, ',')) { + parser_report(parser, "expected ','", parser->tok.loc); + parser_skip_to_next_line(parser); + goto error_free_ops; + } + POperand* operand = parser_parse_operand_3(parser); + if (!operand) { + parser_skip_to_next_line(parser); + goto error_free_ops; + } + ops[ops_size++] = operand; + } + } + if (!parser_eat(parser, '\n') && !parser_test(parser, TT_Eof)) { + parser_report(parser, "expected newline", parser->tok.loc); + goto error_free_ops; + } + parser_skip_newlines(parser); + + PLine* line = pline_new(ident, labels, loc, ops_size, ops); + free(ops); + return line; + +error_free_ops: + for (size_t i = 0; i < ops_size; ++i) + if (ops[i]) + poperand_free(ops[i]); + free(ops); + plabel_free(labels); + free(ident); + return NULL; +} diff --git a/asm/parse.h b/asm/parse.h new file mode 100644 index 0000000..005c67a --- /dev/null +++ b/asm/parse.h @@ -0,0 +1,110 @@ +#pragma once + +#include "common/arch.h" +#include "lex.h" +#include "report.h" +#include +#include + +typedef struct PLabel PLabel; + +struct PLabel { + PLabel* next; + char* ident; + Loc loc; + bool sub_label; +}; + +PLabel* plabel_new(PLabel* next, char* ident, bool sub_label, Loc loc); +void plabel_free(PLabel* label); + +typedef enum { + PoTy_Reg, + PoTy_Imm, + PoTy_Ident, + PoTy_SubLabel, + PoTy_Str, + PoTy_Mem8, + PoTy_Mem16, + PoTy_Not, + PoTy_Negate, + PoTy_Or, + PoTy_Xor, + PoTy_And, + PoTy_Shl, + PoTy_Shr, + PoTy_Add, + PoTy_Sub, + PoTy_Mul, + PoTy_Div, + PoTy_Mod, +} POperandTy; + +typedef struct POperand POperand; + +struct POperand { + POperandTy ty; + Loc loc; + union { + Reg reg; + uint16_t imm; + struct { + char* str; + size_t str_len; + }; + POperand* operand; + struct { + POperand* left; + POperand* right; + }; + }; +}; + +POperand* poperand_new_reg(Reg reg, Loc loc); +POperand* poperand_new_imm(uint16_t imm, Loc loc); +POperand* poperand_new_str(POperandTy ty, char* str, size_t str_len, Loc loc); +POperand* poperand_new_unary(POperandTy ty, POperand* inner, Loc loc); +POperand* poperand_new_binary( + POperandTy ty, POperand* left, POperand* right, Loc loc); +void poperand_free(POperand* operand); + +typedef struct { + PLabel* labels; + char* op; + Loc loc; + size_t ops_size; + POperand* ops[]; +} PLine; + +PLine* pline_new( + char* op, PLabel* labels, Loc loc, size_t ops_size, POperand** ops); +void pline_free(PLine* pline); + +typedef enum { + PStmtTy_Line, + PStmtTy_Global, + PStmtTy_Extern, + PStmtTy_Define, +} PStmtTy; + +typedef struct { + PStmtTy ty; + union { + PLine* line; + char* ident; + }; +} PStmt; + +void pstmt_free(PStmt* stmt); + +typedef struct { + Lexer lexer; + Tok tok; + Tok eaten; + bool error_occured; +} Parser; + +void parser_construct(Parser* parser, const char* filename, const char* text); +bool parser_done(const Parser* parser); +bool parser_error_occured(const Parser* parser); +PLine* parser_next(Parser* parser); diff --git a/asm/report.c b/asm/report.c new file mode 100644 index 0000000..b6f5404 --- /dev/null +++ b/asm/report.c @@ -0,0 +1,44 @@ +#include "report.h" +#include + +void print_report_loc( + const char* filename, const char* text, size_t text_len, Loc loc) +{ + size_t line_start = loc.idx; + while (line_start > 0 && text[line_start] != '\n') { + line_start -= 1; + } + if (text[line_start] == '\n') { + line_start += 1; + } + size_t line_end = loc.idx + 1; + while (line_end < text_len && text[line_end] != '\n') { + line_end += 1; + } + const char* line = &text[line_start]; + int line_len = (int)line_end - (int)line_start; + + fprintf(stderr, + " \x1b[96m--> ./%s:%d:%d\n " + "\x1b[37m|\n\x1b[96m%5d\x1b[37m|%.*s\n " + "|%*c\x1b[1;91m^\x1b[0m\n", + filename, + loc.line, + loc.col, + loc.line, + line_len, + line, + loc.col - 1, + ' '); +} + +void reporter_print_loc(Reporter* rep, Loc loc) +{ + print_report_loc(rep->filename, rep->text, rep->text_len, loc); +} + +void reporter_error_with_loc(Reporter* rep, const char* msg, Loc loc) +{ + REPORTF_ERROR("%s", msg); + reporter_print_loc(rep, loc); +} diff --git a/asm/report.h b/asm/report.h new file mode 100644 index 0000000..4fd9e6e --- /dev/null +++ b/asm/report.h @@ -0,0 +1,31 @@ +#pragma once + +#include +#include + +typedef struct { + size_t idx; + int line; + int col; +} Loc; + +#define REPORTF_ERROR(FMT, ...) \ + (fprintf( \ + stderr, "\x1b[1;91merror\x1b[1;97m: " FMT "\x1b[0m\n", __VA_ARGS__)) +#define REPORTF_INFO(FMT, ...) \ + (fprintf(stderr, "\x1b[1;96minfo\x1b[1;97m: " FMT "\x1b[0m\n", __VA_ARGS__)) +#define REPORTF_WARNING(FMT, ...) \ + (fprintf( \ + stderr, "\x1b[1;93mwarning\x1b[1;97m: " FMT "\x1b[0m\n", __VA_ARGS__)) + +void print_report_loc( + const char* filename, const char* text, size_t text_len, Loc loc); + +typedef struct { + const char* filename; + const char* text; + size_t text_len; +} Reporter; + +void reporter_print_loc(Reporter* rep, Loc loc); +void reporter_error_with_loc(Reporter* rep, const char* msg, Loc loc); diff --git a/asm/resolve.c b/asm/resolve.c new file mode 100644 index 0000000..39f5333 --- /dev/null +++ b/asm/resolve.c @@ -0,0 +1,92 @@ +#include "resolve.h" +#include +#include + +void ident_resol_destroy(IdentResol* resol) +{ + switch (resol->ty) { + case IdentResolTy_None: + break; + case IdentResolTy_Label: + case IdentResolTy_SubLabel: + free(resol->ident); + break; + } +} + +void ident_resolver_construct(IdentResolver* resolver) +{ + size_t capacity = 512; + *resolver = (IdentResolver) { + .resols = malloc(sizeof(IdentResol) * capacity), + .resols_capacity = capacity, + .resols_size = 0, + }; +} + +void ident_resolver_destroy(IdentResolver* resolver) +{ + for (size_t i = 0; i < resolver->resols_size; ++i) { + ident_resol_destroy(&resolver->resols[i]); + } + free(resolver->resols); +} + +static inline size_t ident_resolver_first_empty(IdentResolver* resolver) +{ + size_t i = 0; + for (; i < resolver->resols_size; ++i) { + if (resolver->resols[i].ty == IdentResolTy_None) { + break; + } + } + if (i >= resolver->resols_size) { + if (resolver->resols_size + 1 > resolver->resols_capacity) { + resolver->resols_capacity *= 2; + resolver->resols = realloc(resolver->resols, + sizeof(IdentResol) * resolver->resols_capacity); + } + resolver->resols_size += 1; + } + return i; +} + +void ident_resolver_define_label( + IdentResolver* resolver, char* ident, Loc loc, uint16_t asm_ip) +{ + size_t i = ident_resolver_first_empty(resolver); + resolver->resols[i] = (IdentResol) { + .ident = ident, + .loc = loc, + .ty = IdentResolTy_Label, + .ip = asm_ip * 2, + }; + resolver->current_parent = &resolver->resols[i]; +} + +void ident_resolver_define_sublabel( + IdentResolver* resolver, char* ident, Loc loc, uint16_t asm_ip) +{ + size_t i = ident_resolver_first_empty(resolver); + resolver->resols[i] = (IdentResol) { + .ident = ident, + .loc = loc, + .parent = resolver->current_parent, + .ty = IdentResolTy_SubLabel, + .ip = asm_ip * 2, + }; +} + +const IdentResol* ident_resolver_resolve( + const IdentResolver* resolver, const char* ident) +{ + for (size_t i = resolver->resols_size; i > 0; --i) { + IdentResol* re = &resolver->resols[i - 1]; + if (re->ty != IdentResolTy_None && strcmp(re->ident, ident) == 0 + && (re->ty != IdentResolTy_SubLabel + || re->parent == resolver->current_parent)) { + return re; + } + } + return NULL; +} diff --git a/asm/resolve.h b/asm/resolve.h new file mode 100644 index 0000000..0936ebb --- /dev/null +++ b/asm/resolve.h @@ -0,0 +1,40 @@ +#pragma once + +#include "report.h" +#include + +typedef enum { + IdentResolTy_None, + IdentResolTy_Label, + IdentResolTy_SubLabel, +} IdentResolTy; + +typedef struct IdentResol IdentResol; +struct IdentResol { + char* ident; + Loc loc; + const IdentResol* parent; + IdentResolTy ty; + union { + uint16_t ip; + }; +}; + +void ident_resol_destroy(IdentResol* resol); + +typedef struct IdentResolver IdentResolver; + +struct IdentResolver { + IdentResol* resols; + size_t resols_capacity; + size_t resols_size; + const IdentResol* current_parent; +}; +void ident_resolver_construct(IdentResolver* resolver); +void ident_resolver_destroy(IdentResolver* resolver); +void ident_resolver_define_label( + IdentResolver* resolver, char* ident, Loc loc, uint16_t asm_ip); +void ident_resolver_define_sublabel( + IdentResolver* resolver, char* ident, Loc loc, uint16_t asm_ip); +const IdentResol* ident_resolver_resolve( + const IdentResolver* resolver, const char* ident); diff --git a/asm/str.c b/asm/str.c new file mode 100644 index 0000000..1816c79 --- /dev/null +++ b/asm/str.c @@ -0,0 +1,28 @@ +#include "str.h" +#include +#include + +bool str_includes(const char* str, char ch) +{ + for (size_t i = 0; str[i] != '\0'; ++i) { + if (str[i] == ch) { + return true; + } + } + return false; +} + +char* asm_strdup(const char* str) +{ + size_t len = strlen(str); + char* val = calloc(len + 1, sizeof(char)); + strncpy(val, str, len); + return val; +} + +char* asm_strndup(const char* str, size_t len) +{ + char* val = calloc(len + 1, sizeof(char)); + strncpy(val, str, len); + return val; +} diff --git a/asm/str.h b/asm/str.h new file mode 100644 index 0000000..edc2bd0 --- /dev/null +++ b/asm/str.h @@ -0,0 +1,8 @@ +#pragma once + +#include +#include + +bool str_includes(const char* str, char ch); +char* asm_strdup(const char* str); +char* asm_strndup(const char* str, size_t len);