#include "parse.h" #include "report.h" #include #include #include #include typedef enum { TT_Err, TT_Eof, TT_Ident, TT_Int, TT_Binary, TT_Hex, TT_Char, TT_Str, TT_Newline = '\n', TT_DoubleLt, TT_DoubleGt, TT_Pipe = '|', TT_Hat = '^', TT_Ampersand = '&', TT_Plus = '+', TT_Minus = '-', TT_Asterisk = '*', TT_Slash = '/', TT_Percent = '%', TT_LParen = '(', TT_RParen = ')', TT_LBracket = '[', TT_RBracket = ']', TT_Dot = '.', TT_Comma = ',', TT_Colon = ':', TT_Exclamation = '!', } TokTy; typedef struct { TokTy ty; Loc loc; size_t len; } Tok; typedef struct { const char* filename; const char* text; size_t text_len; size_t idx; int line; int col; char ch; bool error_occured; } Lexer; static void lexer_init(Lexer* lexer, const char* filename, const char* text); static Tok lexer_next(Lexer* lexer); static void lexer_report(Lexer* lexer, const char* msg, Loc loc); static int lexer_skip_literal_char(Lexer* lexer); static Tok lexer_tok(const Lexer* lexer, TokTy ty, Loc loc); static Loc lexer_loc(const Lexer* lexer); static void lexer_step(Lexer* lexer); static bool lexer_done(const Lexer* lexer); static bool str_includes(const char* str, char ch); struct Parser { Lexer lexer; Tok tok; Tok eaten; bool error_occured; bool label_fail; Tok last_ident_tok; }; static PExpr* parser_parse_operand_3(Parser* parser); static PExpr* parser_parse_operand_2(Parser* parser, int prec); static PExpr* parser_parse_operand_1(Parser* parser); static PExpr* parser_parse_operand_0(Parser* parser); static void parser_skip_to_next_line(Parser* parser); static void parser_report(Parser* parser, const char* msg, Loc loc); static char literal_char_val(const char* str); static char* parser_str_val(const Parser* parser, size_t* str_len, Tok tok); static char* parser_tok_strdup(const Parser* parser, Tok tok); static bool parser_tok_streq(const Parser* parser, Tok tok, const char* text); static bool parser_eat(Parser* parser, TokTy ty); static bool parser_test(const Parser* parser, TokTy ty); static void parser_step(Parser* parser); Parser* parser_new(const char* filename, const char* text) { Parser* parser = malloc(sizeof(Parser)); *parser = (Parser) { .lexer = {}, .tok = {}, .eaten = {}, .error_occured = false, .label_fail = false, .last_ident_tok = {}, }; lexer_init(&parser->lexer, filename, text); parser->tok = lexer_next(&parser->lexer); return parser; } void parser_free(Parser* parser) { free(parser); } bool parser_next_is_const(Parser* parser) { return parser_test(parser, TT_Ident) && parser_tok_streq(parser, parser->tok, "const"); } bool parser_next_is_include(Parser* parser) { return parser_test(parser, TT_Ident) && parser_tok_streq(parser, parser->tok, "include"); } PConst* parser_parse_const(Parser* parser) { Loc loc = parser->tok.loc; parser_step(parser); if (!parser_eat(parser, TT_Ident)) { parser_report(parser, "expected identifier", parser->tok.loc); return nullptr; } char* ident = parser_tok_strdup(parser, parser->eaten); PExpr* value = parser_parse_operand_3(parser); PConst* stmt = malloc(sizeof(PConst)); *stmt = (PConst) { loc, ident, value }; return stmt; } PInclude* parser_parse_include(Parser* parser) { Loc loc = parser->tok.loc; parser_step(parser); if (!parser_eat(parser, TT_Str)) { parser_report(parser, "expected string", parser->tok.loc); return nullptr; } size_t str_len; char* str = parser_str_val(parser, &str_len, parser->eaten); PInclude* stmt = malloc(sizeof(PInclude)); *stmt = (PInclude) { loc, str }; return stmt; } PLabel* parser_parse_label(Parser* parser) { if (parser->tok.ty == TT_Eof || parser->label_fail) return nullptr; parser_skip_newlines(parser); Loc loc = parser->tok.loc; if (parser_eat(parser, '.')) { if (!parser_eat(parser, TT_Ident)) { parser_report(parser, "expected identifier", parser->tok.loc); return nullptr; } char* ident = parser_tok_strdup(parser, parser->eaten); if (!parser_eat(parser, ':')) { parser_report(parser, "expected ':'", parser->tok.loc); free(ident); return nullptr; } PLabel* label = malloc(sizeof(PLabel)); *label = (PLabel) { loc, ident, .local = true }; return label; } else if (parser_eat(parser, TT_Ident)) { parser->last_ident_tok = parser->eaten; if (!parser_eat(parser, ':')) { parser->label_fail = true; return nullptr; } char* ident = parser_tok_strdup(parser, parser->last_ident_tok); PLabel* label = malloc(sizeof(PLabel)); *label = (PLabel) { loc, ident, .local = false }; return label; } else { parser_report(parser, "expected identifier or ':'", parser->tok.loc); return nullptr; } } PLine* parser_parse_line(Parser* parser) { constexpr size_t max_ops_size = 2; PExpr* ops[max_ops_size]; size_t ops_size = 0; if (!parser_test(parser, TT_Eof) && !parser_test(parser, '\n')) { PExpr* operand = parser_parse_operand_3(parser); if (!operand) { parser_skip_to_next_line(parser); goto error_free_ops; } ops[ops_size++] = operand; while (!parser_test(parser, TT_Eof) && !parser_test(parser, '\n') && ops_size < 3) { if (ops_size >= max_ops_size) { parser_report(parser, "exceeded maximum number of operands (64)", parser->tok.loc); parser_skip_to_next_line(parser); goto error_free_ops; } if (!parser_eat(parser, ',')) { parser_report(parser, "expected ','", parser->tok.loc); parser_skip_to_next_line(parser); goto error_free_ops; } PExpr* operand = parser_parse_operand_3(parser); if (!operand) { parser_skip_to_next_line(parser); goto error_free_ops; } ops[ops_size++] = operand; } } if (!parser_eat(parser, '\n') && !parser_test(parser, TT_Eof)) { parser_report(parser, "expected newline", parser->tok.loc); goto error_free_ops; } parser_skip_newlines(parser); PLine* line = malloc(sizeof(PLine)); *line = (PLine) { parser->last_ident_tok.loc, parser_tok_strdup(parser, parser->last_ident_tok), .ops = {}, .ops_size = ops_size, }; for (size_t i = 0; i < ops_size; ++i) line->ops[i] = ops[i]; return line; error_free_ops: for (size_t i = 0; i < ops_size; ++i) if (ops[i]) pexpr_free(ops[i]); return nullptr; } static const int parser_binary_prec = 6; PExpr* parser_parse_operand_3(Parser* parser) { Loc loc = parser->tok.loc; if (parser_eat(parser, TT_LBracket)) { parser_report(parser, "expected 'u8' or 'u16' before '['", loc); return NULL; } if (!parser_test(parser, TT_Ident)) { return parser_parse_operand_2(parser, parser_binary_prec); } if (parser_eat(parser, '[')) { PExpr* operand = parser_parse_operand_2(parser, parser_binary_prec); if (!parser_eat(parser, ']')) { parser_report(parser, "expected ']'", parser->tok.loc); pexpr_free(operand); return nullptr; } PExpr* expr = malloc(sizeof(PExpr)); *expr = (PExpr) { .ty = PExprTy_Mem, .loc = loc, .operand = operand, }; return expr; } else { return parser_parse_operand_2(parser, parser_binary_prec); } } PExpr* parser_parse_operand_2(Parser* parser, int prec) { const PExprTy op_tys[] = { PExprTy_Or, PExprTy_Xor, PExprTy_And, PExprTy_Shr, PExprTy_Shl, PExprTy_Add, PExprTy_Sub, PExprTy_Mul, PExprTy_Div, PExprTy_Mod, }; const TokTy op_tts[] = { '|', '^', '&', TT_DoubleGt, TT_DoubleLt, '+', '-', '*', '/', '%', }; const int op_precs[] = { 6, 5, 4, 3, 3, 2, 2, 1, 1, 1 }; static_assert(sizeof(op_tys) / sizeof(op_tys[0]) == sizeof(op_tts) / sizeof(op_tts[0]), "misaligned"); static_assert(sizeof(op_tys) / sizeof(op_tys[0]) == sizeof(op_precs) / sizeof(op_precs[0]), "misaligned"); if (prec == 0) { return parser_parse_operand_1(parser); } PExpr* left = parser_parse_operand_2(parser, prec - 1); bool should_continue = true; while (should_continue) { should_continue = false; for (size_t i = 0; i < sizeof(op_tys) / sizeof(op_tys[0]); ++i) { if (prec >= op_precs[i] && parser_eat(parser, op_tts[i])) { PExpr* right = parser_parse_operand_2(parser, prec - 1); PExpr* new_left = malloc(sizeof(PExpr)); *new_left = (PExpr) { .ty = op_tys[i], .loc = left->loc, .left = left, .right = right, }; left = new_left; should_continue = true; break; } } } return left; } PExpr* parser_parse_operand_1(Parser* parser) { Loc loc = parser->tok.loc; if (parser_eat(parser, '-')) { PExpr* operand = parser_parse_operand_1(parser); PExpr* expr = malloc(sizeof(PExpr)); *expr = (PExpr) { .ty = PExprTy_Negate, .loc = loc, .operand = operand, }; return expr; } else if (parser_eat(parser, '!')) { PExpr* operand = parser_parse_operand_1(parser); PExpr* expr = malloc(sizeof(PExpr)); *expr = (PExpr) { .ty = PExprTy_Not, .loc = loc, .operand = operand, }; return expr; } else { return parser_parse_operand_0(parser); } } PExpr* parser_parse_operand_0(Parser* parser) { Loc loc = parser->tok.loc; if (parser_eat(parser, TT_Ident)) { char* ident = parser_tok_strdup(parser, parser->eaten); PExpr* expr = malloc(sizeof(PExpr)); *expr = (PExpr) { .ty = PExprTy_Ident, .loc = loc, .str = ident, }; return expr; } else if (parser_eat(parser, TT_Int)) { char* str = parser_tok_strdup(parser, parser->eaten); uint64_t val = strtoull(str, NULL, 10); free(str); if (val > 0xffff) { parser_report(parser, "integers larger than 65536 not supported", parser->tok.loc); return nullptr; } uint16_t imm = (uint16_t)val; PExpr* expr = malloc(sizeof(PExpr)); *expr = (PExpr) { .ty = PExprTy_Imm, .loc = loc, .imm = imm, }; return expr; } else if (parser_eat(parser, TT_Binary)) { char* str = parser_tok_strdup(parser, parser->eaten); uint64_t val = strtoull(&str[2], NULL, 2); free(str); if (val > 0xffff) { parser_report(parser, "integers larger than 65536 not supported", parser->tok.loc); return NULL; } uint16_t imm = (uint16_t)val; PExpr* expr = malloc(sizeof(PExpr)); *expr = (PExpr) { .ty = PExprTy_Imm, .loc = loc, .imm = imm, }; return expr; } else if (parser_eat(parser, TT_Hex)) { char* str = parser_tok_strdup(parser, parser->eaten); uint64_t val = strtoull(&str[2], NULL, 16); free(str); if (val > 0xffff) { parser_report(parser, "integers larger than 65536 not supported", parser->tok.loc); return NULL; } uint16_t imm = (uint16_t)val; PExpr* expr = malloc(sizeof(PExpr)); *expr = (PExpr) { .ty = PExprTy_Imm, .loc = loc, .imm = imm, }; return expr; } else if (parser_eat(parser, TT_Char)) { char* str = parser_tok_strdup(parser, parser->eaten); uint16_t imm = (uint16_t)literal_char_val(&str[1]); free(str); PExpr* expr = malloc(sizeof(PExpr)); *expr = (PExpr) { .ty = PExprTy_Imm, .loc = loc, .imm = imm, }; return expr; } else if (parser_eat(parser, TT_Str)) { size_t str_len; char* str = parser_str_val(parser, &str_len, parser->eaten); PExpr* expr = malloc(sizeof(PExpr)); *expr = (PExpr) { .ty = PExprTy_Str, .loc = loc, .str = str, }; return expr; } else if (parser_eat(parser, '.')) { if (!parser_eat(parser, TT_Ident)) { parser_report(parser, "expected identifier", parser->tok.loc); return NULL; } char* ident = parser_tok_strdup(parser, parser->eaten); PExpr* expr = malloc(sizeof(PExpr)); *expr = (PExpr) { .ty = PExprTy_SubLabel, .loc = loc, .str = ident, }; return expr; } else if (parser_eat(parser, '(')) { PExpr* operand = parser_parse_operand_2(parser, parser_binary_prec); if (!parser_eat(parser, ')')) { parser_report(parser, "expected ')'", parser->tok.loc); pexpr_free(operand); return NULL; } return operand; } else { parser_report(parser, "expected operand", parser->tok.loc); return NULL; } } void parser_skip_to_next_line(Parser* parser) { while (!parser_done(parser) && !parser_eat(parser, TT_Newline)) { parser_step(parser); } } bool parser_error_occured(const Parser* parser) { return parser->error_occured || parser->lexer.error_occured; } void parser_skip_newlines(Parser* parser) { while (parser_eat(parser, '\n')) { } } void parser_report(Parser* parser, const char* msg, Loc loc) { parser->error_occured = true; fprintf(stderr, FMT_ERROR("%s"), msg); loc_pretty_print(loc, parser->lexer.text, parser->lexer.text_len); } char* parser_str_val(const Parser* parser, size_t* str_len, Tok tok) { char* lit = parser_tok_strdup(parser, tok); char* str = calloc(tok.len - 1, sizeof(char)); *str_len = 0; for (size_t i = 1; i < tok.len - 1; ++i) { str[*str_len] = literal_char_val(&lit[i]); *str_len += 1; } free(lit); return str; } char literal_char_val(const char* str) { if (str[0] == '\\') { switch (str[1]) { case '0': return 0; case 't': return '\t'; case 'n': return '\n'; default: return str[1]; } } else { return str[0]; } } char* parser_tok_strdup(const Parser* parser, Tok tok) { return strndup(&parser->lexer.text[tok.loc.idx], tok.len); } bool parser_tok_streq(const Parser* parser, Tok tok, const char* text) { return tok.len == strlen(text) && strncmp(&parser->lexer.text[tok.loc.idx], text, tok.len) == 0; } bool parser_eat(Parser* parser, TokTy ty) { if (parser_test(parser, ty)) { parser->eaten = parser->tok; parser_step(parser); return true; } return false; } bool parser_test(const Parser* parser, TokTy ty) { return parser->tok.ty == ty; } void parser_step(Parser* parser) { parser->tok = lexer_next(&parser->lexer); } bool parser_done(const Parser* parser) { return parser->tok.ty == TT_Eof; } void lexer_init(Lexer* lexer, const char* filename, const char* text) { *lexer = (Lexer) { .filename = filename, .text = text, .text_len = strlen(text), .idx = 0, .line = 1, .col = 1, .ch = text[0], .error_occured = false, }; } Tok lexer_next(Lexer* lexer) { const char* ident_chars = "abcdefghijklmnopqrstuvwxyz" "ABCDEFGHIJKLMNOPQRSTUVWXYZ_$"; const char* int_chars = "1234567890"; const char* hex_chars = "01234567889abcdefABCDEF"; Loc loc = lexer_loc(lexer); if (lexer_done(lexer)) { return lexer_tok(lexer, TT_Eof, loc); } if (lexer->ch == '\n') { lexer_step(lexer); return lexer_tok(lexer, '\n', loc); } else if (str_includes(" \t", lexer->ch)) { while (!lexer_done(lexer) && str_includes(" \t", lexer->ch)) { lexer_step(lexer); } return lexer_next(lexer); } else if (str_includes(ident_chars, lexer->ch)) { while (!lexer_done(lexer) && (str_includes(ident_chars, lexer->ch) || str_includes(int_chars, lexer->ch))) { lexer_step(lexer); } return lexer_tok(lexer, TT_Ident, loc); } else if (str_includes(int_chars, lexer->ch) && lexer->ch != '0') { while (!lexer_done(lexer) && (str_includes(int_chars, lexer->ch))) { lexer_step(lexer); } return lexer_tok(lexer, TT_Int, loc); } else if (lexer->ch == ';') { while (!lexer_done(lexer) && lexer->ch != '\n') { lexer_step(lexer); } return lexer_next(lexer); } else if (lexer->ch == '0') { lexer_step(lexer); if (lexer->ch == 'b') { lexer_step(lexer); if (lexer_done(lexer) || !str_includes("01", lexer->ch)) { lexer_report(lexer, "malformed binary literal", loc); return lexer_tok(lexer, TT_Err, loc); } while (!lexer_done(lexer) && str_includes("01", lexer->ch)) { lexer_step(lexer); } return lexer_tok(lexer, TT_Binary, loc); } else if (lexer->ch == 'x') { lexer_step(lexer); if (lexer_done(lexer) || !str_includes(hex_chars, lexer->ch)) { lexer_report(lexer, "malformed hex literal", loc); return lexer_tok(lexer, TT_Err, loc); } while (!lexer_done(lexer) && str_includes(hex_chars, lexer->ch)) { lexer_step(lexer); } return lexer_tok(lexer, TT_Hex, loc); } else { return lexer_tok(lexer, TT_Int, loc); } } else if (lexer->ch == '\'') { lexer_step(lexer); lexer_skip_literal_char(lexer); if (lexer_done(lexer) || lexer->ch != '\'') { lexer_report(lexer, "malformed character literal", loc); return lexer_tok(lexer, TT_Err, loc); } lexer_step(lexer); return lexer_tok(lexer, TT_Char, loc); } else if (lexer->ch == '"') { lexer_step(lexer); while (!lexer_done(lexer) && lexer->ch != '"') { lexer_skip_literal_char(lexer); } if (lexer_done(lexer) || lexer->ch != '"') { lexer_report(lexer, "malformed string literal", loc); return lexer_tok(lexer, TT_Err, loc); } lexer_step(lexer); return lexer_tok(lexer, TT_Str, loc); } else if (lexer->ch == '<') { lexer_step(lexer); if (!lexer_done(lexer) && lexer->ch == '<') { lexer_step(lexer); return lexer_tok(lexer, TT_DoubleLt, loc); } else { lexer_report(lexer, "expected '<'", loc); return lexer_tok(lexer, TT_Err, loc); } } else if (lexer->ch == '>') { lexer_step(lexer); if (!lexer_done(lexer) && lexer->ch == '>') { lexer_step(lexer); return lexer_tok(lexer, TT_DoubleGt, loc); } else { lexer_report(lexer, "expected '>'", loc); return lexer_tok(lexer, TT_Err, loc); } } else if (str_includes("|^&+-*/%()[].,:!", lexer->ch)) { char ch = lexer->ch; lexer_step(lexer); return lexer_tok(lexer, (TokTy)ch, loc); } else { lexer_report(lexer, "illegal character", loc); lexer_step(lexer); return lexer_tok(lexer, TT_Err, loc); } } int lexer_skip_literal_char(Lexer* lexer) { char ch = lexer->ch; lexer_step(lexer); if (ch == '\\') { if (lexer_done(lexer)) return -1; lexer_step(lexer); } return 0; } void lexer_step(Lexer* lexer) { if (lexer_done(lexer)) { return; } if (lexer->ch == '\n') { lexer->line += 1; lexer->col = 1; } else { lexer->col += 1; } lexer->idx += 1; lexer->ch = lexer->text[lexer->idx]; } void lexer_report(Lexer* lexer, const char* msg, Loc loc) { lexer->error_occured = true; fprintf(stderr, FMT_ERROR("%s"), msg); loc_pretty_print(loc, lexer->text, lexer->text_len); } Loc lexer_loc(const Lexer* lexer) { return (Loc) { .filename = lexer->filename, .idx = lexer->idx, .line = lexer->line, .col = lexer->col, }; } bool lexer_done(const Lexer* lexer) { return lexer->idx >= lexer->text_len; } Tok lexer_tok(const Lexer* lexer, TokTy ty, Loc loc) { return (Tok) { .ty = ty, .loc = loc, .len = lexer->idx - loc.idx }; } bool str_includes(const char* str, char ch) { for (size_t i = 0; str[i] != '\0'; ++i) { if (str[i] == ch) { return true; } } return false; } void pexpr_free(PExpr* expr) { switch (expr->ty) { case PExprTy_Err: case PExprTy_Imm: break; case PExprTy_Ident: case PExprTy_SubLabel: case PExprTy_Str: free(expr->str); break; case PExprTy_Mem: case PExprTy_Not: case PExprTy_Negate: pexpr_free(expr->operand); break; case PExprTy_Or: case PExprTy_Xor: case PExprTy_And: case PExprTy_Shl: case PExprTy_Shr: case PExprTy_Add: case PExprTy_Sub: case PExprTy_Mul: case PExprTy_Div: case PExprTy_Mod: pexpr_free(expr->left); pexpr_free(expr->right); break; } free(expr); } void pconst_free(PConst* stmt) { free(stmt->ident); pexpr_free(stmt->value); free(stmt); } void pinclude_free(PInclude* stmt) { free(stmt->filename); free(stmt); } void plabel_free(PLabel* stmt) { free(stmt->ident); free(stmt); } void pline_free(PLine* stmt) { free(stmt->ident); for (size_t i = 0; i < stmt->ops_size; ++i) { pexpr_free(stmt->ops[i]); } free(stmt); }