From 9eb3922ad6c4d9239bbd4b3e873e2f33154a45ad Mon Sep 17 00:00:00 2001 From: sfja Date: Sat, 6 Sep 2025 02:01:16 +0200 Subject: [PATCH] add asm parser --- asm/main.c | 112 ++++++- asm/parse.c | 814 +++++++++++++++++++++++++++++++++++++++++++++++++++ asm/parse.h | 90 ++++++ asm/report.c | 38 +++ asm/report.h | 20 ++ 5 files changed, 1072 insertions(+), 2 deletions(-) create mode 100644 asm/parse.c create mode 100644 asm/parse.h create mode 100644 asm/report.c create mode 100644 asm/report.h diff --git a/asm/main.c b/asm/main.c index edfa8b2..a0baa2b 100644 --- a/asm/main.c +++ b/asm/main.c @@ -1,6 +1,114 @@ +#include "parse.h" +#include "report.h" +#include #include +#include +#include -int main(void) +typedef struct { + char* input_file; + char* output_file; +} Args; + +static int parse_args(Args* args, int argc, char** argv); + +static char* read_text_file(const char* filename); + +int main(int argc, char** argv) { - puts("asm"); + Args args; + if (parse_args(&args, argc, argv) != 0) { + return EXIT_FAILURE; + } + + char* text = read_text_file(args.input_file); + if (!text) { + return EXIT_FAILURE; + } + + Parser* parser = parser_new(args.input_file, text); + + size_t lines_capacity = 1024; + PLine** lines = malloc(sizeof(PLine*) * lines_capacity); + size_t lines_size = 0; + + while (!parser_done(parser)) { + if (parser_next_is_const(parser)) { + PConst* stmt = parser_parse_const(parser); + + } else if (parser_next_is_include(parser)) { + PInclude* stmt = parser_parse_include(parser); + + } else { + PLabel* label; + while ((label = parser_parse_label(parser)) != nullptr) { } + } + } + + return EXIT_SUCCESS; +} + +int parse_args(Args* args, int argc, char** argv) +{ + *args = (Args) { + .input_file = nullptr, + .output_file = nullptr, + }; + + int i = 1; + while (i < argc) { + if (strcmp(argv[i], "-o") == 0) { + i += 1; + if (i >= argc) { + fprintf(stderr, FMT_ERROR("expected filename after '-o'")); + return 1; + } + args->output_file = argv[i]; + i += 1; + } else if (argv[i][0] == '-') { + fprintf(stderr, FMT_ERROR("unrecognized argument '%s'"), argv[i]); + return 1; + } else { + if (args->input_file != nullptr) { + fprintf(stderr, FMT_ERROR("multiple input files")); + return 1; + } + args->input_file = argv[i]; + i += 1; + } + } + + if (!args->input_file) { + fprintf(stderr, FMT_ERROR("no input file")); + return 1; + } + + return 0; +} + +char* read_text_file(const char* filename) +{ + FILE* fp = fopen(filename, "r"); + if (!fp) { + fprintf(stderr, + FMT_ERROR("could not open file '%s' for reading: %s"), + filename, + strerror(errno)); + return NULL; + } + fseek(fp, 0L, SEEK_END); + size_t file_size = (size_t)ftell(fp); + rewind(fp); + + char* text = calloc(file_size + 1, sizeof(char)); + size_t bytes_read = fread(text, sizeof(char), file_size, fp); + fclose(fp); + if (bytes_read != file_size) { + fprintf(stderr, + FMT_ERROR("could not read input file '%s': %s"), + filename, + strerror(errno)); + return NULL; + } + return text; } diff --git a/asm/parse.c b/asm/parse.c new file mode 100644 index 0000000..15b3844 --- /dev/null +++ b/asm/parse.c @@ -0,0 +1,814 @@ +#include "parse.h" +#include "report.h" +#include +#include +#include +#include + +typedef enum { + TT_Err, + TT_Eof, + TT_Ident, + TT_Int, + TT_Binary, + TT_Hex, + TT_Char, + TT_Str, + TT_Newline = '\n', + TT_DoubleLt, + TT_DoubleGt, + TT_Pipe = '|', + TT_Hat = '^', + TT_Ampersand = '&', + TT_Plus = '+', + TT_Minus = '-', + TT_Asterisk = '*', + TT_Slash = '/', + TT_Percent = '%', + TT_LParen = '(', + TT_RParen = ')', + TT_LBracket = '[', + TT_RBracket = ']', + TT_Dot = '.', + TT_Comma = ',', + TT_Colon = ':', + TT_Exclamation = '!', +} TokTy; + +typedef struct { + TokTy ty; + Loc loc; + size_t len; +} Tok; + +typedef struct { + const char* filename; + const char* text; + size_t text_len; + size_t idx; + int line; + int col; + char ch; + bool error_occured; +} Lexer; + +static void lexer_init(Lexer* lexer, const char* filename, const char* text); +static Tok lexer_next(Lexer* lexer); + +static void lexer_report(Lexer* lexer, const char* msg, Loc loc); +static int lexer_skip_literal_char(Lexer* lexer); +static Tok lexer_tok(const Lexer* lexer, TokTy ty, Loc loc); +static Loc lexer_loc(const Lexer* lexer); +static void lexer_step(Lexer* lexer); +static bool lexer_done(const Lexer* lexer); +static bool str_includes(const char* str, char ch); + +struct Parser { + Lexer lexer; + Tok tok; + Tok eaten; + bool error_occured; + + bool label_fail; + Tok last_ident_tok; +}; + +static PExpr* parser_parse_operand_3(Parser* parser); +static PExpr* parser_parse_operand_2(Parser* parser, int prec); +static PExpr* parser_parse_operand_1(Parser* parser); +static PExpr* parser_parse_operand_0(Parser* parser); +static void parser_skip_to_next_line(Parser* parser); +static void parser_report(Parser* parser, const char* msg, Loc loc); +static char literal_char_val(const char* str); +static char* parser_str_val(const Parser* parser, size_t* str_len, Tok tok); +static char* parser_tok_strdup(const Parser* parser, Tok tok); +static bool parser_tok_streq(const Parser* parser, Tok tok, const char* text); +static bool parser_eat(Parser* parser, TokTy ty); +static bool parser_test(const Parser* parser, TokTy ty); +static void parser_step(Parser* parser); + +Parser* parser_new(const char* filename, const char* text) +{ + Parser* parser = malloc(sizeof(Parser)); + + *parser = (Parser) { + .lexer = {}, + .tok = {}, + .eaten = {}, + .error_occured = false, + + .label_fail = false, + .last_ident_tok = {}, + }; + + lexer_init(&parser->lexer, filename, text); + parser->tok = lexer_next(&parser->lexer); + + return parser; +} + +void parser_free(Parser* parser) +{ + free(parser); +} + +bool parser_next_is_const(Parser* parser) +{ + return parser_test(parser, TT_Ident) + && parser_tok_streq(parser, parser->tok, "const"); +} + +bool parser_next_is_include(Parser* parser) +{ + return parser_test(parser, TT_Ident) + && parser_tok_streq(parser, parser->tok, "include"); +} + +PConst* parser_parse_const(Parser* parser) +{ + + Loc loc = parser->tok.loc; + + parser_step(parser); + if (!parser_eat(parser, TT_Ident)) { + parser_report(parser, "expected identifier", parser->tok.loc); + return nullptr; + } + char* ident = parser_tok_strdup(parser, parser->eaten); + PExpr* value = parser_parse_operand_3(parser); + + PConst* stmt = malloc(sizeof(PConst)); + *stmt = (PConst) { loc, ident, value }; + return stmt; +} + +PInclude* parser_parse_include(Parser* parser) +{ + Loc loc = parser->tok.loc; + parser_step(parser); + if (!parser_eat(parser, TT_Str)) { + parser_report(parser, "expected string", parser->tok.loc); + return nullptr; + } + size_t str_len; + char* str = parser_str_val(parser, &str_len, parser->eaten); + + PInclude* stmt = malloc(sizeof(PInclude)); + *stmt = (PInclude) { loc, str }; + return stmt; +} + +PLabel* parser_parse_label(Parser* parser) +{ + if (parser->tok.ty == TT_Eof || parser->label_fail) + return nullptr; + + parser_skip_newlines(parser); + Loc loc = parser->tok.loc; + if (parser_eat(parser, '.')) { + if (!parser_eat(parser, TT_Ident)) { + parser_report(parser, "expected identifier", parser->tok.loc); + return nullptr; + } + char* ident = parser_tok_strdup(parser, parser->eaten); + if (!parser_eat(parser, ':')) { + parser_report(parser, "expected ':'", parser->tok.loc); + free(ident); + return nullptr; + } + PLabel* label = malloc(sizeof(PLabel)); + *label = (PLabel) { loc, ident, .local = true }; + return label; + } else if (parser_eat(parser, TT_Ident)) { + parser->last_ident_tok = parser->eaten; + if (!parser_eat(parser, ':')) { + parser->label_fail = true; + return nullptr; + } + char* ident = parser_tok_strdup(parser, parser->last_ident_tok); + + PLabel* label = malloc(sizeof(PLabel)); + *label = (PLabel) { loc, ident, .local = false }; + return label; + } else { + parser_report(parser, "expected identifier or ':'", parser->tok.loc); + return nullptr; + } +} + +PLine* parser_parse_line(Parser* parser) +{ + constexpr size_t max_ops_size = 2; + PExpr* ops[max_ops_size]; + size_t ops_size = 0; + + if (!parser_test(parser, TT_Eof) && !parser_test(parser, '\n')) { + PExpr* operand = parser_parse_operand_3(parser); + if (!operand) { + parser_skip_to_next_line(parser); + goto error_free_ops; + } + ops[ops_size++] = operand; + while (!parser_test(parser, TT_Eof) && !parser_test(parser, '\n') + && ops_size < 3) { + if (ops_size >= max_ops_size) { + parser_report(parser, + "exceeded maximum number of operands (64)", + parser->tok.loc); + parser_skip_to_next_line(parser); + goto error_free_ops; + } + if (!parser_eat(parser, ',')) { + parser_report(parser, "expected ','", parser->tok.loc); + parser_skip_to_next_line(parser); + goto error_free_ops; + } + PExpr* operand = parser_parse_operand_3(parser); + if (!operand) { + parser_skip_to_next_line(parser); + goto error_free_ops; + } + ops[ops_size++] = operand; + } + } + if (!parser_eat(parser, '\n') && !parser_test(parser, TT_Eof)) { + parser_report(parser, "expected newline", parser->tok.loc); + goto error_free_ops; + } + parser_skip_newlines(parser); + + PLine* line = malloc(sizeof(PLine)); + *line = (PLine) { + parser->last_ident_tok.loc, + parser_tok_strdup(parser, parser->last_ident_tok), + .ops = {}, + .ops_size = ops_size, + }; + + for (size_t i = 0; i < ops_size; ++i) + line->ops[i] = ops[i]; + + return line; + +error_free_ops: + for (size_t i = 0; i < ops_size; ++i) + if (ops[i]) + pexpr_free(ops[i]); + return nullptr; +} + +static const int parser_binary_prec = 6; + +PExpr* parser_parse_operand_3(Parser* parser) +{ + Loc loc = parser->tok.loc; + if (parser_eat(parser, TT_LBracket)) { + parser_report(parser, "expected 'u8' or 'u16' before '['", loc); + return NULL; + } + if (!parser_test(parser, TT_Ident)) { + return parser_parse_operand_2(parser, parser_binary_prec); + } + if (parser_eat(parser, '[')) { + PExpr* operand = parser_parse_operand_2(parser, parser_binary_prec); + if (!parser_eat(parser, ']')) { + parser_report(parser, "expected ']'", parser->tok.loc); + pexpr_free(operand); + return nullptr; + } + PExpr* expr = malloc(sizeof(PExpr)); + *expr = (PExpr) { + .ty = PExprTy_Mem, + .loc = loc, + .operand = operand, + }; + return expr; + } else { + return parser_parse_operand_2(parser, parser_binary_prec); + } +} + +PExpr* parser_parse_operand_2(Parser* parser, int prec) +{ + const PExprTy op_tys[] = { + PExprTy_Or, + PExprTy_Xor, + PExprTy_And, + PExprTy_Shr, + PExprTy_Shl, + PExprTy_Add, + PExprTy_Sub, + PExprTy_Mul, + PExprTy_Div, + PExprTy_Mod, + }; + const TokTy op_tts[] = { + '|', + '^', + '&', + TT_DoubleGt, + TT_DoubleLt, + '+', + '-', + '*', + '/', + '%', + }; + const int op_precs[] = { 6, 5, 4, 3, 3, 2, 2, 1, 1, 1 }; + static_assert(sizeof(op_tys) / sizeof(op_tys[0]) + == sizeof(op_tts) / sizeof(op_tts[0]), + "misaligned"); + static_assert(sizeof(op_tys) / sizeof(op_tys[0]) + == sizeof(op_precs) / sizeof(op_precs[0]), + "misaligned"); + + if (prec == 0) { + return parser_parse_operand_1(parser); + } + PExpr* left = parser_parse_operand_2(parser, prec - 1); + bool should_continue = true; + while (should_continue) { + should_continue = false; + for (size_t i = 0; i < sizeof(op_tys) / sizeof(op_tys[0]); ++i) { + if (prec >= op_precs[i] && parser_eat(parser, op_tts[i])) { + PExpr* right = parser_parse_operand_2(parser, prec - 1); + + PExpr* new_left = malloc(sizeof(PExpr)); + *new_left = (PExpr) { + .ty = op_tys[i], + .loc = left->loc, + .left = left, + .right = right, + }; + left = new_left; + + should_continue = true; + break; + } + } + } + return left; +} + +PExpr* parser_parse_operand_1(Parser* parser) +{ + + Loc loc = parser->tok.loc; + if (parser_eat(parser, '-')) { + PExpr* operand = parser_parse_operand_1(parser); + + PExpr* expr = malloc(sizeof(PExpr)); + *expr = (PExpr) { + .ty = PExprTy_Negate, + .loc = loc, + .operand = operand, + }; + return expr; + } else if (parser_eat(parser, '!')) { + PExpr* operand = parser_parse_operand_1(parser); + + PExpr* expr = malloc(sizeof(PExpr)); + *expr = (PExpr) { + .ty = PExprTy_Not, + .loc = loc, + .operand = operand, + }; + return expr; + } else { + return parser_parse_operand_0(parser); + } +} + +PExpr* parser_parse_operand_0(Parser* parser) +{ + Loc loc = parser->tok.loc; + if (parser_eat(parser, TT_Ident)) { + char* ident = parser_tok_strdup(parser, parser->eaten); + + PExpr* expr = malloc(sizeof(PExpr)); + *expr = (PExpr) { + .ty = PExprTy_Ident, + .loc = loc, + .str = ident, + }; + return expr; + } else if (parser_eat(parser, TT_Int)) { + char* str = parser_tok_strdup(parser, parser->eaten); + uint64_t val = strtoull(str, NULL, 10); + free(str); + if (val > 0xffff) { + parser_report(parser, + "integers larger than 65536 not supported", + parser->tok.loc); + return nullptr; + } + uint16_t imm = (uint16_t)val; + + PExpr* expr = malloc(sizeof(PExpr)); + *expr = (PExpr) { + .ty = PExprTy_Imm, + .loc = loc, + .imm = imm, + }; + return expr; + } else if (parser_eat(parser, TT_Binary)) { + char* str = parser_tok_strdup(parser, parser->eaten); + uint64_t val = strtoull(&str[2], NULL, 2); + free(str); + if (val > 0xffff) { + parser_report(parser, + "integers larger than 65536 not supported", + parser->tok.loc); + return NULL; + } + uint16_t imm = (uint16_t)val; + + PExpr* expr = malloc(sizeof(PExpr)); + *expr = (PExpr) { + .ty = PExprTy_Imm, + .loc = loc, + .imm = imm, + }; + return expr; + } else if (parser_eat(parser, TT_Hex)) { + char* str = parser_tok_strdup(parser, parser->eaten); + uint64_t val = strtoull(&str[2], NULL, 16); + free(str); + if (val > 0xffff) { + parser_report(parser, + "integers larger than 65536 not supported", + parser->tok.loc); + return NULL; + } + uint16_t imm = (uint16_t)val; + + PExpr* expr = malloc(sizeof(PExpr)); + *expr = (PExpr) { + .ty = PExprTy_Imm, + .loc = loc, + .imm = imm, + }; + return expr; + } else if (parser_eat(parser, TT_Char)) { + char* str = parser_tok_strdup(parser, parser->eaten); + uint16_t imm = (uint16_t)literal_char_val(&str[1]); + free(str); + + PExpr* expr = malloc(sizeof(PExpr)); + *expr = (PExpr) { + .ty = PExprTy_Imm, + .loc = loc, + .imm = imm, + }; + return expr; + } else if (parser_eat(parser, TT_Str)) { + size_t str_len; + char* str = parser_str_val(parser, &str_len, parser->eaten); + + PExpr* expr = malloc(sizeof(PExpr)); + *expr = (PExpr) { + .ty = PExprTy_Str, + .loc = loc, + .str = str, + }; + return expr; + } else if (parser_eat(parser, '.')) { + if (!parser_eat(parser, TT_Ident)) { + parser_report(parser, "expected identifier", parser->tok.loc); + return NULL; + } + char* ident = parser_tok_strdup(parser, parser->eaten); + + PExpr* expr = malloc(sizeof(PExpr)); + *expr = (PExpr) { + .ty = PExprTy_SubLabel, + .loc = loc, + .str = ident, + }; + return expr; + } else if (parser_eat(parser, '(')) { + PExpr* operand = parser_parse_operand_2(parser, parser_binary_prec); + if (!parser_eat(parser, ')')) { + parser_report(parser, "expected ')'", parser->tok.loc); + pexpr_free(operand); + return NULL; + } + return operand; + } else { + parser_report(parser, "expected operand", parser->tok.loc); + return NULL; + } +} + +void parser_skip_to_next_line(Parser* parser) +{ + while (!parser_done(parser) && !parser_eat(parser, TT_Newline)) { + parser_step(parser); + } +} + +bool parser_error_occured(const Parser* parser) +{ + return parser->error_occured || parser->lexer.error_occured; +} + +void parser_skip_newlines(Parser* parser) +{ + while (parser_eat(parser, '\n')) { } +} + +void parser_report(Parser* parser, const char* msg, Loc loc) +{ + parser->error_occured = true; + fprintf(stderr, FMT_ERROR("%s"), msg); + loc_pretty_print(loc, parser->lexer.text, parser->lexer.text_len); +} + +char* parser_str_val(const Parser* parser, size_t* str_len, Tok tok) +{ + char* lit = parser_tok_strdup(parser, tok); + char* str = calloc(tok.len - 1, sizeof(char)); + *str_len = 0; + for (size_t i = 1; i < tok.len - 1; ++i) { + str[*str_len] = literal_char_val(&lit[i]); + *str_len += 1; + } + free(lit); + return str; +} + +char literal_char_val(const char* str) +{ + if (str[0] == '\\') { + switch (str[1]) { + case '0': + return 0; + case 't': + return '\t'; + case 'n': + return '\n'; + default: + return str[1]; + } + } else { + return str[0]; + } +} + +char* parser_tok_strdup(const Parser* parser, Tok tok) +{ + return strndup(&parser->lexer.text[tok.loc.idx], tok.len); +} + +bool parser_tok_streq(const Parser* parser, Tok tok, const char* text) +{ + return tok.len == strlen(text) + && strncmp(&parser->lexer.text[tok.loc.idx], text, tok.len) == 0; +} + +bool parser_eat(Parser* parser, TokTy ty) +{ + if (parser_test(parser, ty)) { + parser->eaten = parser->tok; + parser_step(parser); + return true; + } + return false; +} + +bool parser_test(const Parser* parser, TokTy ty) +{ + return parser->tok.ty == ty; +} + +void parser_step(Parser* parser) +{ + parser->tok = lexer_next(&parser->lexer); +} + +bool parser_done(const Parser* parser) +{ + return parser->tok.ty == TT_Eof; +} + +void lexer_init(Lexer* lexer, const char* filename, const char* text) +{ + *lexer = (Lexer) { + .filename = filename, + .text = text, + .text_len = strlen(text), + .idx = 0, + .line = 1, + .col = 1, + .ch = text[0], + .error_occured = false, + }; +} + +Tok lexer_next(Lexer* lexer) +{ + const char* ident_chars = "abcdefghijklmnopqrstuvwxyz" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ_$"; + const char* int_chars = "1234567890"; + const char* hex_chars = "01234567889abcdefABCDEF"; + + Loc loc = lexer_loc(lexer); + if (lexer_done(lexer)) { + return lexer_tok(lexer, TT_Eof, loc); + } + if (lexer->ch == '\n') { + lexer_step(lexer); + return lexer_tok(lexer, '\n', loc); + } else if (str_includes(" \t", lexer->ch)) { + while (!lexer_done(lexer) && str_includes(" \t", lexer->ch)) { + lexer_step(lexer); + } + return lexer_next(lexer); + } else if (str_includes(ident_chars, lexer->ch)) { + while (!lexer_done(lexer) + && (str_includes(ident_chars, lexer->ch) + || str_includes(int_chars, lexer->ch))) { + lexer_step(lexer); + } + return lexer_tok(lexer, TT_Ident, loc); + } else if (str_includes(int_chars, lexer->ch) && lexer->ch != '0') { + while (!lexer_done(lexer) && (str_includes(int_chars, lexer->ch))) { + lexer_step(lexer); + } + return lexer_tok(lexer, TT_Int, loc); + } else if (lexer->ch == ';') { + while (!lexer_done(lexer) && lexer->ch != '\n') { + lexer_step(lexer); + } + return lexer_next(lexer); + } else if (lexer->ch == '0') { + lexer_step(lexer); + if (lexer->ch == 'b') { + lexer_step(lexer); + if (lexer_done(lexer) || !str_includes("01", lexer->ch)) { + lexer_report(lexer, "malformed binary literal", loc); + return lexer_tok(lexer, TT_Err, loc); + } + while (!lexer_done(lexer) && str_includes("01", lexer->ch)) { + lexer_step(lexer); + } + return lexer_tok(lexer, TT_Binary, loc); + } else if (lexer->ch == 'x') { + lexer_step(lexer); + if (lexer_done(lexer) || !str_includes(hex_chars, lexer->ch)) { + lexer_report(lexer, "malformed hex literal", loc); + return lexer_tok(lexer, TT_Err, loc); + } + while (!lexer_done(lexer) && str_includes(hex_chars, lexer->ch)) { + lexer_step(lexer); + } + return lexer_tok(lexer, TT_Hex, loc); + + } else { + return lexer_tok(lexer, TT_Int, loc); + } + } else if (lexer->ch == '\'') { + lexer_step(lexer); + lexer_skip_literal_char(lexer); + if (lexer_done(lexer) || lexer->ch != '\'') { + lexer_report(lexer, "malformed character literal", loc); + return lexer_tok(lexer, TT_Err, loc); + } + lexer_step(lexer); + return lexer_tok(lexer, TT_Char, loc); + } else if (lexer->ch == '"') { + lexer_step(lexer); + while (!lexer_done(lexer) && lexer->ch != '"') { + lexer_skip_literal_char(lexer); + } + if (lexer_done(lexer) || lexer->ch != '"') { + lexer_report(lexer, "malformed string literal", loc); + return lexer_tok(lexer, TT_Err, loc); + } + lexer_step(lexer); + return lexer_tok(lexer, TT_Str, loc); + } else if (lexer->ch == '<') { + lexer_step(lexer); + if (!lexer_done(lexer) && lexer->ch == '<') { + lexer_step(lexer); + return lexer_tok(lexer, TT_DoubleLt, loc); + } else { + lexer_report(lexer, "expected '<'", loc); + return lexer_tok(lexer, TT_Err, loc); + } + } else if (lexer->ch == '>') { + lexer_step(lexer); + if (!lexer_done(lexer) && lexer->ch == '>') { + lexer_step(lexer); + return lexer_tok(lexer, TT_DoubleGt, loc); + } else { + lexer_report(lexer, "expected '>'", loc); + return lexer_tok(lexer, TT_Err, loc); + } + } else if (str_includes("|^&+-*/%()[].,:!", lexer->ch)) { + char ch = lexer->ch; + lexer_step(lexer); + return lexer_tok(lexer, (TokTy)ch, loc); + } else { + lexer_report(lexer, "illegal character", loc); + lexer_step(lexer); + return lexer_tok(lexer, TT_Err, loc); + } +} + +int lexer_skip_literal_char(Lexer* lexer) +{ + char ch = lexer->ch; + lexer_step(lexer); + if (ch == '\\') { + if (lexer_done(lexer)) + return -1; + lexer_step(lexer); + } + return 0; +} + +void lexer_step(Lexer* lexer) +{ + if (lexer_done(lexer)) { + return; + } + if (lexer->ch == '\n') { + lexer->line += 1; + lexer->col = 1; + } else { + lexer->col += 1; + } + lexer->idx += 1; + lexer->ch = lexer->text[lexer->idx]; +} + +void lexer_report(Lexer* lexer, const char* msg, Loc loc) +{ + lexer->error_occured = true; + fprintf(stderr, FMT_ERROR("%s"), msg); + loc_pretty_print(loc, lexer->text, lexer->text_len); +} + +Loc lexer_loc(const Lexer* lexer) +{ + return (Loc) { + .filename = lexer->filename, + .idx = lexer->idx, + .line = lexer->line, + .col = lexer->col, + }; +} + +bool lexer_done(const Lexer* lexer) +{ + return lexer->idx >= lexer->text_len; +} + +Tok lexer_tok(const Lexer* lexer, TokTy ty, Loc loc) +{ + return (Tok) { .ty = ty, .loc = loc, .len = lexer->idx - loc.idx }; +} + +bool str_includes(const char* str, char ch) +{ + for (size_t i = 0; str[i] != '\0'; ++i) { + if (str[i] == ch) { + return true; + } + } + return false; +} + +void pexpr_free(PExpr* expr) +{ + switch (expr->ty) { + case PExprTy_Err: + case PExprTy_Imm: + break; + case PExprTy_Ident: + case PExprTy_SubLabel: + case PExprTy_Str: + free(expr->str); + break; + case PExprTy_Mem: + case PExprTy_Not: + case PExprTy_Negate: + pexpr_free(expr->operand); + break; + case PExprTy_Or: + case PExprTy_Xor: + case PExprTy_And: + case PExprTy_Shl: + case PExprTy_Shr: + case PExprTy_Add: + case PExprTy_Sub: + case PExprTy_Mul: + case PExprTy_Div: + case PExprTy_Mod: + pexpr_free(expr->left); + pexpr_free(expr->right); + break; + } + free(expr); +} diff --git a/asm/parse.h b/asm/parse.h new file mode 100644 index 0000000..b75f29f --- /dev/null +++ b/asm/parse.h @@ -0,0 +1,90 @@ +#ifndef PARSE_H +#define PARSE_H + +#include "report.h" +#include +#include + +typedef enum { + PExprTy_Err, + PExprTy_Ident, + PExprTy_SubLabel, + PExprTy_Imm, + PExprTy_Str, + PExprTy_Mem, + PExprTy_Not, + PExprTy_Negate, + PExprTy_Or, + PExprTy_Xor, + PExprTy_And, + PExprTy_Shl, + PExprTy_Shr, + PExprTy_Add, + PExprTy_Sub, + PExprTy_Mul, + PExprTy_Div, + PExprTy_Mod, +} PExprTy; + +typedef struct PExpr PExpr; + +struct PExpr { + PExprTy ty; + Loc loc; + union { + char* str; + uint16_t imm; + PExpr* operand; + struct { + PExpr* left; + PExpr* right; + }; + }; +}; + +void pexpr_free(PExpr* expr); + +typedef struct { + Loc loc; + char* ident; + PExpr* value; +} PConst; + +typedef struct { + Loc loc; + char* filename; +} PInclude; + +typedef struct { + Loc loc; + char* ident; + bool local; +} PLabel; + +typedef struct { + Loc loc; + char* ident; + PExpr* ops[2]; + size_t ops_size; +} PLine; + +typedef struct Parser Parser; + +Parser* parser_new(const char* filename, const char* text); +void parser_free(Parser* parser); + +void parser_skip_newlines(Parser* parser); + +bool parser_next_is_const(Parser* parser); +bool parser_next_is_include(Parser* parser); +bool parser_next_is_label(Parser* parser); + +PConst* parser_parse_const(Parser* parser); +PInclude* parser_parse_include(Parser* parser); +PLabel* parser_parse_label(Parser* parser); +PLine* parser_parse_line(Parser* parser); + +bool parser_done(const Parser* parser); +bool parser_error_occured(const Parser* parser); + +#endif diff --git a/asm/report.c b/asm/report.c new file mode 100644 index 0000000..adc67ed --- /dev/null +++ b/asm/report.c @@ -0,0 +1,38 @@ +#include "report.h" +#include + +void loc_pretty_print(Loc loc, const char* text, size_t text_len) +{ + const char* displacement_spaces + = " " + " " + " " + " "; + + size_t line_start = loc.idx; + while (line_start > 0 && text[line_start] != '\n') { + line_start -= 1; + } + if (text[line_start] == '\n') { + line_start += 1; + } + size_t line_end = loc.idx + 1; + while (line_end < text_len && text[line_end] != '\n') { + line_end += 1; + } + const char* line = &text[line_start]; + int line_len = (int)line_end - (int)line_start; + + fprintf(stderr, + " \x1b[96m--> ./%s:%d:%d\n " + "\x1b[37m|\n\x1b[96m%5d\x1b[37m|\x1b[0m%.*s\n " + "\x1b[37m|%.*s\x1b[1;91m^\x1b[0m\n", + loc.filename, + loc.line, + loc.col, + loc.line, + line_len, + line, + loc.col - 1, + displacement_spaces); +} diff --git a/asm/report.h b/asm/report.h new file mode 100644 index 0000000..08f2c99 --- /dev/null +++ b/asm/report.h @@ -0,0 +1,20 @@ +#ifndef REPORT_H +#define REPORT_H + +#include +#include + +#define FMT_ERROR(FMT) "\x1b[1;91merror\x1b[1;97m: " FMT "\x1b[0m\n" +#define FMT_WARNING(FMT) "\x1b[1;93mwarning\x1b[1;97m: " FMT "\x1b[0m\n" +#define FMT_INFO(FMT) "\x1b[1;96minfo\x1b[1;97m: " FMT "\x1b[0m\n" + +typedef struct { + const char* filename; + size_t idx; + int line; + int col; +} Loc; + +void loc_pretty_print(Loc loc, const char* text, size_t text_len); + +#endif