#include "common/arch.h" #include #include #include #include #include #include #include static inline bool str_includes(const char* str, char ch) { for (size_t i = 0; str[i] != '\0'; ++i) { if (str[i] == ch) { return true; } } return false; } static inline char* asm_strndup(const char* str, size_t len) { char* val = calloc(len + 1, sizeof(char)); strncpy(val, str, len); return val; } typedef struct { size_t idx; int line; int col; } Loc; #define REPORTF_INNER(FMT, ...) \ (fprintf(stderr, "error: " FMT "\n", __VA_ARGS__)) #define REPORTF(...) REPORTF_INNER(__VA_ARGS__) static inline void report_with_loc(const char* filename, const char* text, size_t text_len, const char* msg, Loc loc) { size_t line_start = loc.idx; while (line_start > 0 && text[line_start] != '\n') { line_start -= 1; } if (text[line_start] == '\n') { line_start += 1; } size_t line_end = loc.idx + 1; while (line_end < text_len && text[line_end] != '\n') { line_end += 1; } const char* line = &text[line_start]; int line_len = (int)line_end - (int)line_start; REPORTF("%s", msg); fprintf(stderr, " --> %s:%d:%d\n |\n%5d|%.*s\n |%*c^\n", filename, loc.line, loc.col, loc.line, line_len, line, loc.col - 1, ' '); } typedef enum { TT_Err, TT_Eof, TT_Ident, TT_Int, TT_Binary, TT_Hex, TT_Char, TT_Str, TT_Newline = '\n', TT_DoubleLt, TT_DoubleGt, TT_Pipe = '|', TT_Hat = '^', TT_Ampersand = '&', TT_Plus = '+', TT_Minus = '-', TT_Asterisk = '*', TT_Slash = '/', TT_Percent = '%', TT_LParen = '(', TT_RParen = ')', TT_LBracket = '[', TT_RBracket = ']', TT_Dot = '.', TT_Comma = ',', TT_Colon = ':', TT_Exclamation = '!', } TokTy; typedef struct { TokTy ty; Loc loc; size_t len; } Tok; typedef struct { const char* filename; const char* text; size_t text_len; size_t idx; int line; int col; char ch; bool error_occured; } Lexer; void lexer_construct(Lexer* lexer, const char* filename, const char* text) { *lexer = (Lexer) { .filename = filename, .text = text, .text_len = strlen(text), .idx = 0, .line = 1, .col = 1, .ch = text[0], .error_occured = false, }; } static inline bool lexer_done(const Lexer* lexer) { return lexer->idx >= lexer->text_len; } static inline void lexer_step(Lexer* lexer) { if (lexer_done(lexer)) { return; } if (lexer->ch == '\n') { lexer->line += 1; lexer->col = 1; } else { lexer->col += 1; } lexer->idx += 1; lexer->ch = lexer->text[lexer->idx]; } static inline Loc lexer_loc(const Lexer* lexer) { return (Loc) { .idx = lexer->idx, .line = lexer->line, .col = lexer->col }; } static inline Tok lexer_tok(const Lexer* lexer, TokTy ty, Loc loc) { return (Tok) { .ty = ty, .loc = loc, .len = lexer->idx - loc.idx }; } static inline int lexer_skip_literal_char(Lexer* lexer) { char ch = lexer->ch; lexer_step(lexer); if (ch == '\\') { if (lexer_done(lexer)) return -1; lexer_step(lexer); } return 0; } static inline void lexer_report(Lexer* lexer, const char* msg, Loc loc) { lexer->error_occured = true; report_with_loc(lexer->filename, lexer->text, lexer->text_len, msg, loc); } Tok lexer_next(Lexer* lexer) { const char* ident_chars = "abcdefghijklmnopqrstuvwxyz" "ABCDEFGHIJKLMNOPQRSTUVWXYZ_$"; const char* int_chars = "1234567890"; const char* hex_chars = "01234567889abcdefABCDEF"; Loc loc = lexer_loc(lexer); if (lexer_done(lexer)) { return lexer_tok(lexer, TT_Eof, loc); } if (lexer->ch == '\n') { lexer_step(lexer); return lexer_tok(lexer, '\n', loc); } else if (str_includes(" \t", lexer->ch)) { while (!lexer_done(lexer) && str_includes(" \t", lexer->ch)) { lexer_step(lexer); } return lexer_next(lexer); } else if (str_includes(ident_chars, lexer->ch)) { while (!lexer_done(lexer) && (str_includes(ident_chars, lexer->ch) || str_includes(int_chars, lexer->ch))) { lexer_step(lexer); } return lexer_tok(lexer, TT_Ident, loc); } else if (str_includes(int_chars, lexer->ch) && lexer->ch != '0') { while (!lexer_done(lexer) && (str_includes(int_chars, lexer->ch))) { lexer_step(lexer); } return lexer_tok(lexer, TT_Int, loc); } else if (lexer->ch == ';') { while (!lexer_done(lexer) && lexer->ch != '\n') { lexer_step(lexer); } return lexer_next(lexer); } else if (lexer->ch == '0') { lexer_step(lexer); if (lexer->ch == 'b') { lexer_step(lexer); if (lexer_done(lexer) || !str_includes("01", lexer->ch)) { lexer_report(lexer, "malformed binary literal", loc); return lexer_tok(lexer, TT_Err, loc); } while (!lexer_done(lexer) && str_includes("01", lexer->ch)) { lexer_step(lexer); } return lexer_tok(lexer, TT_Binary, loc); } else if (lexer->ch == 'x') { lexer_step(lexer); if (lexer_done(lexer) || !str_includes(hex_chars, lexer->ch)) { lexer_report(lexer, "malformed hex literal", loc); return lexer_tok(lexer, TT_Err, loc); } while (!lexer_done(lexer) && str_includes(hex_chars, lexer->ch)) { lexer_step(lexer); } return lexer_tok(lexer, TT_Hex, loc); } else { return lexer_tok(lexer, TT_Int, loc); } } else if (lexer->ch == '\'') { lexer_step(lexer); lexer_skip_literal_char(lexer); if (lexer_done(lexer) || lexer->ch != '\'') { lexer_report(lexer, "malformed character literal", loc); return lexer_tok(lexer, TT_Err, loc); } lexer_step(lexer); return lexer_tok(lexer, TT_Char, loc); } else if (lexer->ch == '"') { lexer_step(lexer); while (!lexer_done(lexer) && lexer->ch != '"') { lexer_skip_literal_char(lexer); } if (lexer_done(lexer) || lexer->ch != '"') { lexer_report(lexer, "malformed string literal", loc); return lexer_tok(lexer, TT_Err, loc); } lexer_step(lexer); return lexer_tok(lexer, TT_Str, loc); } else if (lexer->ch == '<') { lexer_step(lexer); if (!lexer_done(lexer) && lexer->ch == '<') { lexer_step(lexer); return lexer_tok(lexer, TT_DoubleLt, loc); } else { lexer_report(lexer, "expected '<'", loc); return lexer_tok(lexer, TT_Err, loc); } } else if (lexer->ch == '>') { lexer_step(lexer); if (!lexer_done(lexer) && lexer->ch == '>') { lexer_step(lexer); return lexer_tok(lexer, TT_DoubleGt, loc); } else { lexer_report(lexer, "expected '>'", loc); return lexer_tok(lexer, TT_Err, loc); } } else if (str_includes("|^&+-*/%()[].,:!", lexer->ch)) { char ch = lexer->ch; lexer_step(lexer); return lexer_tok(lexer, (TokTy)ch, loc); } else { lexer_report(lexer, "illegal character", loc); lexer_step(lexer); return lexer_tok(lexer, TT_Err, loc); } } // typedef enum { // M_err, // M_d8, // M_d16, // M_nop, // M_hlt, // M_jmp, // M_jmpf, // M_jnz, // M_cmp, // M_mov, // M_in, // M_out, // M_call, // M_callf, // M_ret, // M_retf, // M_lit, // M_int, // M_iret, // } Mnemonic; typedef struct PLabel PLabel; struct PLabel { PLabel* next; char* ident; Loc loc; bool sub_label; }; PLabel* plabel_new(PLabel* next, char* ident, bool sub_label, Loc loc) { PLabel* label = malloc(sizeof(PLabel)); *label = (PLabel) { next, ident, loc, sub_label }; return label; } void plabel_free(PLabel* label) { if (!label) { return; } plabel_free(label->next); free(label->ident); free(label); } typedef enum { PoTy_Reg, PoTy_Imm, PoTy_Ident, PoTy_SubLabel, PoTy_Str, PoTy_MemU8, PoTy_MemU16, PoTy_Not, PoTy_Negate, PoTy_Or, PoTy_Xor, PoTy_And, PoTy_Shl, PoTy_Shr, PoTy_Add, PoTy_Sub, PoTy_Mul, PoTy_Div, PoTy_Mod, } POperandTy; typedef struct POperand POperand; struct POperand { POperandTy ty; Loc loc; union { Reg reg; uint16_t imm; char* str; POperand* operand; struct { POperand* left; POperand* right; }; }; }; POperand* poperand_new_reg(Reg reg, Loc loc) { POperand* operand = malloc(sizeof(POperand)); *operand = (POperand) { .ty = PoTy_Reg, .loc = loc, .reg = reg }; return operand; } POperand* poperand_new_imm(uint16_t imm, Loc loc) { POperand* operand = malloc(sizeof(POperand)); *operand = (POperand) { .ty = PoTy_Imm, .loc = loc, .imm = imm }; return operand; } POperand* poperand_new_str(POperandTy ty, char* str, Loc loc) { POperand* operand = malloc(sizeof(POperand)); *operand = (POperand) { .ty = ty, .loc = loc, .str = str }; return operand; } POperand* poperand_new_unary(POperandTy ty, POperand* inner, Loc loc) { POperand* operand = malloc(sizeof(POperand)); *operand = (POperand) { .ty = ty, .loc = loc, .operand = inner }; return operand; } POperand* poperand_new_binary( POperandTy ty, POperand* left, POperand* right, Loc loc) { POperand* operand = malloc(sizeof(POperand)); *operand = (POperand) { .ty = ty, .loc = loc, .left = left, .right = right }; return operand; } void poperand_free(POperand* operand) { switch (operand->ty) { case PoTy_Reg: case PoTy_Imm: break; case PoTy_Ident: case PoTy_SubLabel: case PoTy_Str: free(operand->str); break; case PoTy_MemU8: case PoTy_MemU16: case PoTy_Not: case PoTy_Negate: poperand_free(operand->operand); break; case PoTy_Or: case PoTy_Xor: case PoTy_And: case PoTy_Shl: case PoTy_Shr: case PoTy_Add: case PoTy_Sub: case PoTy_Mul: case PoTy_Div: case PoTy_Mod: poperand_free(operand->left); poperand_free(operand->right); break; } free(operand); } typedef struct { PLabel* labels; char* op; Loc loc; size_t ops_size; POperand* ops[]; } PLine; PLine* pline_new( char* op, PLabel* labels, Loc loc, size_t ops_size, POperand** ops) { PLine* line = malloc(sizeof(PLine) + sizeof(POperand*) * ops_size); *line = (PLine) { .labels = labels, .op = op, .loc = loc, .ops_size = ops_size, }; for (size_t i = 0; i < ops_size; ++i) { line->ops[i] = ops[i]; } return line; } void pline_free(PLine* pline) { plabel_free(pline->labels); free(pline->op); for (size_t i = 0; i < pline->ops_size; ++i) { poperand_free(pline->ops[i]); } free(pline); } typedef struct { Lexer lexer; Tok tok; Tok eaten; bool error_occured; } Parser; void parser_construct(Parser* parser, const char* filename, const char* text) { Lexer lexer; lexer_construct(&lexer, filename, text); *parser = (Parser) { .lexer = lexer, .tok = lexer_next(&lexer), .eaten = (Tok) { 0 }, .error_occured = false, }; } bool parser_done(const Parser* parser) { return parser->tok.ty == TT_Eof; } bool parser_error_occured(const Parser* parser) { return parser->error_occured || parser->lexer.error_occured; } static inline void parser_step(Parser* parser) { parser->tok = lexer_next(&parser->lexer); } static inline bool parser_test(const Parser* parser, TokTy ty) { return parser->tok.ty == ty; } static inline bool parser_eat(Parser* parser, TokTy ty) { if (parser_test(parser, ty)) { parser->eaten = parser->tok; parser_step(parser); return true; } return false; } static inline char* parser_ident_val(const Parser* parser, Tok tok) { return asm_strndup(&parser->lexer.text[tok.loc.idx], tok.len); } static inline void parser_report(Parser* parser, const char* msg, Loc loc) { parser->error_occured = true; report_with_loc(parser->lexer.filename, parser->lexer.text, parser->lexer.text_len, msg, loc); } static inline void parser_skip_newlines(Parser* parser) { while (parser_eat(parser, '\n')) { } } static inline PLabel* parser_parse_labels( Parser* parser, char** ident, Loc* ident_loc) { *ident = NULL; PLabel* labels = NULL; while (parser->tok.ty != TT_Eof && *ident == NULL) { parser_skip_newlines(parser); Loc loc = parser->tok.loc; if (parser_eat(parser, '.')) { if (!parser_eat(parser, TT_Ident)) { parser_report(parser, "expected identifier", parser->tok.loc); plabel_free(labels); return NULL; } char* label_ident = parser_ident_val(parser, parser->eaten); if (!parser_eat(parser, ':')) { parser_report(parser, "expected ':'", parser->tok.loc); plabel_free(labels); free(label_ident); return NULL; } labels = plabel_new(labels, label_ident, true, loc); } else if (parser_eat(parser, TT_Ident)) { *ident = parser_ident_val(parser, parser->eaten); *ident_loc = loc; if (!parser_eat(parser, ':')) { break; } labels = plabel_new(labels, *ident, false, loc); *ident = NULL; } else { parser_report( parser, "expected identifier or ':'", parser->tok.loc); plabel_free(labels); return NULL; } } return labels; } static inline char literal_char_val(const char* str) { if (str[0] == '\\') { switch (str[1]) { case '0': return 0; case 't': return '\t'; case 'n': return '\n'; default: return str[1]; } } else { return str[0]; } } static const int parser_binary_prec = 6; static inline POperand* parser_parse_operand_2(Parser* parser, int prec); static inline POperand* parser_parse_operand_0(Parser* parser) { Loc loc = parser->tok.loc; if (parser_eat(parser, TT_Ident)) { char* ident = parser_ident_val(parser, parser->eaten); const char* reg_key[10] = { "r0", "r1", "r2", "r3", "r4", "rbp", "rsp", "rfl", "rcs", "rip" }; Reg reg_val[10] = { R0, R1, R2, R3, R4, Rbp, Rsp, Rfl, Rcs, Rip }; for (size_t i = 0; i < 10; ++i) { if (strcmp(reg_key[i], ident) == 0) { free(ident); return poperand_new_reg(reg_val[i], loc); } } return poperand_new_str(PoTy_Ident, ident, loc); } else if (parser_eat(parser, TT_Int)) { char* str = parser_ident_val(parser, parser->eaten); uint16_t imm = (uint16_t)strtoul(str, NULL, 10); free(str); return poperand_new_imm(imm, loc); } else if (parser_eat(parser, TT_Binary)) { char* str = parser_ident_val(parser, parser->eaten); uint16_t imm = (uint16_t)strtoul(&str[2], NULL, 2); free(str); return poperand_new_imm(imm, loc); } else if (parser_eat(parser, TT_Hex)) { char* str = parser_ident_val(parser, parser->eaten); uint16_t imm = (uint16_t)strtoul(&str[2], NULL, 16); free(str); return poperand_new_imm(imm, loc); } else if (parser_eat(parser, TT_Char)) { char* str = parser_ident_val(parser, parser->eaten); uint16_t imm = (uint16_t)literal_char_val(&str[1]); free(str); return poperand_new_imm(imm, loc); } else if (parser_eat(parser, TT_Str)) { char* lit = parser_ident_val(parser, parser->eaten); size_t lit_len = strlen(lit); char* str = calloc(lit_len - 1, sizeof(char)); size_t str_len = 0; for (size_t i = 1; i < lit_len - 2; ++i) { str[i] = literal_char_val(&lit[i]); } free(lit); return poperand_new_str(PoTy_Str, str, loc); } else if (parser_eat(parser, '.')) { if (!parser_eat(parser, TT_Ident)) { parser_report(parser, "expected identifier", parser->tok.loc); return NULL; } char* ident = parser_ident_val(parser, parser->eaten); return poperand_new_str(PoTy_SubLabel, ident, loc); } else if (parser_eat(parser, '(')) { POperand* operand = parser_parse_operand_2(parser, parser_binary_prec); if (!parser_eat(parser, ')')) { parser_report(parser, "expected ')'", parser->tok.loc); poperand_free(operand); return NULL; } return operand; } else { parser_report(parser, "expected operand", parser->tok.loc); return NULL; } } static inline POperand* parser_parse_operand_1(Parser* parser) { Loc loc = parser->tok.loc; if (parser_eat(parser, '-')) { POperand* operand = parser_parse_operand_1(parser); return poperand_new_unary(PoTy_Negate, operand, loc); } else if (parser_eat(parser, '!')) { POperand* operand = parser_parse_operand_1(parser); return poperand_new_unary(PoTy_Not, operand, loc); } else { return parser_parse_operand_0(parser); } } static inline POperand* parser_parse_operand_2(Parser* parser, int prec) { const POperandTy op_tys[] = { PoTy_Or, PoTy_Xor, PoTy_And, PoTy_Shl, PoTy_Shr, PoTy_Add, PoTy_Sub, PoTy_Mul, PoTy_Div, PoTy_Mod, }; const TokTy op_tts[] = { '|', '^', '&', TT_DoubleGt, TT_DoubleLt, '+', '-', '*', '/', '%', }; const int op_precs[] = { 6, 5, 4, 3, 3, 2, 2, 1, 1, 1 }; static_assert(sizeof(op_tys) / sizeof(op_tys[0]) == sizeof(op_tts) / sizeof(op_tts[0]), "misaligned"); static_assert(sizeof(op_tys) / sizeof(op_tys[0]) == sizeof(op_precs) / sizeof(op_precs[0]), "misaligned"); if (prec == 0) { return parser_parse_operand_1(parser); } POperand* left = parser_parse_operand_2(parser, prec - 1); bool should_continue = true; while (should_continue) { should_continue = false; for (size_t i = 0; i < sizeof(op_tys) / sizeof(op_tys[0]); ++i) { if (prec >= op_precs[i] && parser_eat(parser, op_tts[i])) { POperand* right = parser_parse_operand_2(parser, prec - 1); left = poperand_new_binary(op_tys[0], left, right, left->loc); should_continue = true; break; } } } return left; } static inline POperand* parser_parse_operand_3(Parser* parser) { Loc loc = parser->tok.loc; if (parser_eat(parser, TT_LBracket)) { parser_report(parser, "expected 'u8' or 'u16'", loc); return NULL; } if (!parser_test(parser, TT_Ident)) { return parser_parse_operand_2(parser, parser_binary_prec); } char* ident = parser_ident_val(parser, parser->tok); if (strcmp(ident, "u8") == 0) { free(ident); parser_step(parser); if (!parser_eat(parser, '[')) { parser_report(parser, "expected '['", parser->tok.loc); return NULL; } POperand* operand = parser_parse_operand_2(parser, parser_binary_prec); if (!parser_eat(parser, ']')) { parser_report(parser, "expected ']'", parser->tok.loc); poperand_free(operand); return NULL; } return poperand_new_unary(PoTy_MemU8, operand, loc); } else if (strcmp(ident, "u16") == 0) { free(ident); parser_step(parser); if (!parser_eat(parser, '[')) { parser_report(parser, "expected '['", parser->tok.loc); return NULL; } POperand* operand = parser_parse_operand_2(parser, parser_binary_prec); if (!parser_eat(parser, ']')) { parser_report(parser, "expected ']'", parser->tok.loc); poperand_free(operand); return NULL; } return poperand_new_unary(PoTy_MemU16, operand, loc); } else { free(ident); return parser_parse_operand_2(parser, parser_binary_prec); } } PLine* parser_next(Parser* parser) { char* ident; Loc loc; PLabel* labels = parser_parse_labels(parser, &ident, &loc); const size_t max_ops_size = 64; // TODO: Move allocation out-of-band. POperand** ops = malloc(sizeof(POperand) * max_ops_size); size_t ops_size = 0; if (!parser_test(parser, TT_Eof) && !parser_test(parser, '\n')) { POperand* operand = parser_parse_operand_3(parser); if (!operand) { goto error_free_ops; } ops[ops_size++] = operand; while (!parser_test(parser, TT_Eof) && !parser_test(parser, '\n') && ops_size < 3) { if (ops_size >= max_ops_size) { parser_report(parser, "exceeded maximum number of operands (64)", parser->tok.loc); goto error_free_ops; } if (!parser_eat(parser, ',')) { parser_report(parser, "expected ','", parser->tok.loc); goto error_free_ops; } POperand* operand = parser_parse_operand_3(parser); if (!operand) { goto error_free_ops; } ops[ops_size++] = operand; } } if (!parser_eat(parser, '\n') && !parser_test(parser, TT_Eof)) { parser_report(parser, "expected newline", parser->tok.loc); goto error_free_ops; } parser_skip_newlines(parser); PLine* line = pline_new(ident, labels, loc, ops_size, ops); free(ops); return line; error_free_ops: for (size_t i = 0; i < ops_size; ++i) if (ops[i]) poperand_free(ops[i]); free(ops); plabel_free(labels); free(ident); return NULL; } typedef struct { const char* input_file; const char* output_file; } Args; static inline Args parse_args(int argc, char** argv); int main(int argc, char** argv) { Args args = parse_args(argc, argv); FILE* input_fp = fopen(args.input_file, "r"); if (!input_fp) { REPORTF("could not open input file '%s': %s", args.input_file, strerror(errno)); return -1; } fseek(input_fp, 0L, SEEK_END); size_t file_size = (size_t)ftell(input_fp); rewind(input_fp); char* input_text = calloc(file_size + 1, sizeof(char)); size_t bytes_read = fread(input_text, sizeof(char), file_size, input_fp); fclose(input_fp); if (bytes_read != file_size) { REPORTF("could not read input file'%s': %s", args.input_file, strerror(errno)); return -1; } Parser parser; parser_construct(&parser, args.input_file, input_text); while (!parser_done(&parser)) { PLine* line = parser_next(&parser); if (!line) { break; } pline_free(line); if (parser_error_occured(&parser)) { break; } } free(input_text); } static inline Args parse_args(int argc, char** argv) { const char* input_file = NULL; const char* output_file = NULL; for (int i = 1; i < argc; ++i) { if (strcmp(argv[i], "-o") == 0) { i += 1; if (i >= argc) { REPORTF("%s", "no filename given to -o"); exit(1); } output_file = argv[i]; } else { if (input_file != NULL) { REPORTF("%s", "multiple input files specified"); exit(1); } input_file = argv[i]; } } if (input_file == NULL) { REPORTF("%s", "no input file"); exit(1); } if (output_file == NULL) { output_file = "out.o"; } return (Args) { input_file, output_file, }; }