#include "parse.h" #include "lex.h" #include "report.h" #include "str.h" #include #include #include #include PLabel* plabel_new(PLabel* next, char* ident, bool sub_label, Loc loc) { PLabel* label = malloc(sizeof(PLabel)); *label = (PLabel) { next, ident, loc, sub_label }; return label; } void plabel_free(PLabel* label) { if (!label) { return; } plabel_free(label->next); free(label->ident); free(label); } POperand* poperand_new_reg(Reg reg, Loc loc) { POperand* operand = malloc(sizeof(POperand)); *operand = (POperand) { .ty = PoTy_Reg, .loc = loc, .reg = reg }; return operand; } POperand* poperand_new_imm(uint16_t imm, Loc loc) { POperand* operand = malloc(sizeof(POperand)); *operand = (POperand) { .ty = PoTy_Imm, .loc = loc, .imm = imm }; return operand; } POperand* poperand_new_str(POperandTy ty, char* str, size_t str_len, Loc loc) { POperand* operand = malloc(sizeof(POperand)); *operand = (POperand) { .ty = ty, .loc = loc, .str = str, .str_len = str_len, }; return operand; } POperand* poperand_new_unary(POperandTy ty, POperand* inner, Loc loc) { POperand* operand = malloc(sizeof(POperand)); *operand = (POperand) { .ty = ty, .loc = loc, .operand = inner }; return operand; } POperand* poperand_new_binary( POperandTy ty, POperand* left, POperand* right, Loc loc) { POperand* operand = malloc(sizeof(POperand)); *operand = (POperand) { .ty = ty, .loc = loc, .left = left, .right = right }; return operand; } void poperand_free(POperand* operand) { switch (operand->ty) { case PoTy_Reg: case PoTy_Imm: break; case PoTy_Ident: case PoTy_SubLabel: case PoTy_Str: free(operand->str); break; case PoTy_Mem8: case PoTy_Mem16: case PoTy_Not: case PoTy_Negate: poperand_free(operand->operand); break; case PoTy_Or: case PoTy_Xor: case PoTy_And: case PoTy_Shl: case PoTy_Shr: case PoTy_Add: case PoTy_Sub: case PoTy_Mul: case PoTy_Div: case PoTy_Mod: poperand_free(operand->left); poperand_free(operand->right); break; } free(operand); } PLine* pline_new( char* op, PLabel* labels, Loc loc, size_t ops_size, POperand** ops) { PLine* line = malloc(sizeof(PLine) + sizeof(POperand*) * ops_size); *line = (PLine) { .labels = labels, .op = op, .loc = loc, .ops_size = ops_size, }; for (size_t i = 0; i < ops_size; ++i) { line->ops[i] = ops[i]; } return line; } void pline_free(PLine* pline) { plabel_free(pline->labels); free(pline->op); for (size_t i = 0; i < pline->ops_size; ++i) { poperand_free(pline->ops[i]); } free(pline); } void pstmt_free(PStmt* stmt) { switch (stmt->ty) { case PStmtTy_Line: pline_free(stmt->line); break; case PStmtTy_Global: case PStmtTy_Extern: case PStmtTy_Define: free(stmt->ident); break; } free(stmt); } void parser_construct(Parser* parser, const char* filename, const char* text) { Lexer lexer; lexer_construct(&lexer, filename, text); *parser = (Parser) { .lexer = lexer, .tok = lexer_next(&lexer), .eaten = (Tok) { 0 }, .error_occured = false, }; } bool parser_done(const Parser* parser) { return parser->tok.ty == TT_Eof; } bool parser_error_occured(const Parser* parser) { return parser->error_occured || parser->lexer.error_occured; } static inline void parser_step(Parser* parser) { parser->tok = lexer_next(&parser->lexer); } static inline bool parser_test(const Parser* parser, TokTy ty) { return parser->tok.ty == ty; } static inline bool parser_eat(Parser* parser, TokTy ty) { if (parser_test(parser, ty)) { parser->eaten = parser->tok; parser_step(parser); return true; } return false; } static inline char* parser_ident_val(const Parser* parser, Tok tok) { return asm_strndup(&parser->lexer.text[tok.loc.idx], tok.len); } static inline void parser_report(Parser* parser, const char* msg, Loc loc) { parser->error_occured = true; REPORTF_ERROR("%s", msg); print_report_loc(parser->lexer.filename, parser->lexer.text, parser->lexer.text_len, loc); } static inline void parser_skip_newlines(Parser* parser) { while (parser_eat(parser, '\n')) { } } static inline PLabel* parser_parse_labels( Parser* parser, char** ident, Loc* ident_loc) { *ident = NULL; PLabel* labels = NULL; while (parser->tok.ty != TT_Eof && *ident == NULL) { parser_skip_newlines(parser); Loc loc = parser->tok.loc; if (parser_eat(parser, '.')) { if (!parser_eat(parser, TT_Ident)) { parser_report(parser, "expected identifier", parser->tok.loc); plabel_free(labels); return NULL; } char* label_ident = parser_ident_val(parser, parser->eaten); if (!parser_eat(parser, ':')) { parser_report(parser, "expected ':'", parser->tok.loc); plabel_free(labels); free(label_ident); return NULL; } labels = plabel_new(labels, label_ident, true, loc); } else if (parser_eat(parser, TT_Ident)) { *ident = parser_ident_val(parser, parser->eaten); *ident_loc = loc; if (!parser_eat(parser, ':')) { break; } labels = plabel_new(labels, *ident, false, loc); *ident = NULL; } else { parser_report( parser, "expected identifier or ':'", parser->tok.loc); plabel_free(labels); return NULL; } } return labels; } static inline char literal_char_val(const char* str) { if (str[0] == '\\') { switch (str[1]) { case '0': return 0; case 't': return '\t'; case 'n': return '\n'; default: return str[1]; } } else { return str[0]; } } static const int parser_binary_prec = 6; static inline POperand* parser_parse_operand_2(Parser* parser, int prec); static inline POperand* parser_parse_operand_0(Parser* parser) { Loc loc = parser->tok.loc; if (parser_eat(parser, TT_Ident)) { char* ident = parser_ident_val(parser, parser->eaten); const char* reg_key[10] = { "r0", "r1", "r2", "r3", "r4", "rbp", "rsp", "rfl", "rcs", "rip" }; Reg reg_val[10] = { R0, R1, R2, R3, R4, Rbp, Rsp, Rfl, Rcs, Rip }; for (size_t i = 0; i < 10; ++i) { if (strcmp(reg_key[i], ident) == 0) { free(ident); return poperand_new_reg(reg_val[i], loc); } } return poperand_new_str(PoTy_Ident, ident, parser->eaten.len, loc); } else if (parser_eat(parser, TT_Int)) { char* str = parser_ident_val(parser, parser->eaten); uint64_t val = strtoull(str, NULL, 10); free(str); if (val > 0xffff) { parser_report(parser, "integers larger than 65536 not supported", parser->tok.loc); return NULL; } uint16_t imm = (uint16_t)val; return poperand_new_imm(imm, loc); } else if (parser_eat(parser, TT_Binary)) { char* str = parser_ident_val(parser, parser->eaten); uint64_t val = strtoull(&str[2], NULL, 2); free(str); if (val > 0xffff) { parser_report(parser, "integers larger than 65536 not supported", parser->tok.loc); return NULL; } uint16_t imm = (uint16_t)val; return poperand_new_imm(imm, loc); } else if (parser_eat(parser, TT_Hex)) { char* str = parser_ident_val(parser, parser->eaten); uint64_t val = strtoull(&str[2], NULL, 16); free(str); if (val > 0xffff) { parser_report(parser, "integers larger than 65536 not supported", parser->tok.loc); return NULL; } uint16_t imm = (uint16_t)val; return poperand_new_imm(imm, loc); } else if (parser_eat(parser, TT_Char)) { char* str = parser_ident_val(parser, parser->eaten); uint16_t imm = (uint16_t)literal_char_val(&str[1]); free(str); return poperand_new_imm(imm, loc); } else if (parser_eat(parser, TT_Str)) { char* lit = parser_ident_val(parser, parser->eaten); size_t lit_len = strlen(lit); char* str = calloc(lit_len - 1, sizeof(char)); size_t str_len = 0; for (size_t i = 1; i < lit_len - 2; ++i) { str[i] = literal_char_val(&lit[i]); } free(lit); return poperand_new_str(PoTy_Str, str, str_len, loc); } else if (parser_eat(parser, '.')) { if (!parser_eat(parser, TT_Ident)) { parser_report(parser, "expected identifier", parser->tok.loc); return NULL; } char* ident = parser_ident_val(parser, parser->eaten); return poperand_new_str(PoTy_SubLabel, ident, parser->eaten.len, loc); } else if (parser_eat(parser, '(')) { POperand* operand = parser_parse_operand_2(parser, parser_binary_prec); if (!parser_eat(parser, ')')) { parser_report(parser, "expected ')'", parser->tok.loc); poperand_free(operand); return NULL; } return operand; } else { parser_report(parser, "expected operand", parser->tok.loc); return NULL; } } static inline POperand* parser_parse_operand_1(Parser* parser) { Loc loc = parser->tok.loc; if (parser_eat(parser, '-')) { POperand* operand = parser_parse_operand_1(parser); return poperand_new_unary(PoTy_Negate, operand, loc); } else if (parser_eat(parser, '!')) { POperand* operand = parser_parse_operand_1(parser); return poperand_new_unary(PoTy_Not, operand, loc); } else { return parser_parse_operand_0(parser); } } static inline POperand* parser_parse_operand_2(Parser* parser, int prec) { const POperandTy op_tys[] = { PoTy_Or, PoTy_Xor, PoTy_And, PoTy_Shr, PoTy_Shl, PoTy_Add, PoTy_Sub, PoTy_Mul, PoTy_Div, PoTy_Mod, }; const TokTy op_tts[] = { '|', '^', '&', TT_DoubleGt, TT_DoubleLt, '+', '-', '*', '/', '%', }; const int op_precs[] = { 6, 5, 4, 3, 3, 2, 2, 1, 1, 1 }; static_assert(sizeof(op_tys) / sizeof(op_tys[0]) == sizeof(op_tts) / sizeof(op_tts[0]), "misaligned"); static_assert(sizeof(op_tys) / sizeof(op_tys[0]) == sizeof(op_precs) / sizeof(op_precs[0]), "misaligned"); if (prec == 0) { return parser_parse_operand_1(parser); } POperand* left = parser_parse_operand_2(parser, prec - 1); bool should_continue = true; while (should_continue) { should_continue = false; for (size_t i = 0; i < sizeof(op_tys) / sizeof(op_tys[0]); ++i) { if (prec >= op_precs[i] && parser_eat(parser, op_tts[i])) { POperand* right = parser_parse_operand_2(parser, prec - 1); left = poperand_new_binary(op_tys[i], left, right, left->loc); should_continue = true; break; } } } return left; } static inline POperand* parser_parse_operand_3(Parser* parser) { Loc loc = parser->tok.loc; if (parser_eat(parser, TT_LBracket)) { parser_report(parser, "expected 'u8' or 'u16' before '['", loc); return NULL; } if (!parser_test(parser, TT_Ident)) { return parser_parse_operand_2(parser, parser_binary_prec); } char* ident = parser_ident_val(parser, parser->tok); if (strcmp(ident, "u8") == 0) { free(ident); parser_step(parser); if (!parser_eat(parser, '[')) { parser_report(parser, "expected '['", parser->tok.loc); return NULL; } POperand* operand = parser_parse_operand_2(parser, parser_binary_prec); if (!parser_eat(parser, ']')) { parser_report(parser, "expected ']'", parser->tok.loc); poperand_free(operand); return NULL; } return poperand_new_unary(PoTy_Mem8, operand, loc); } else if (strcmp(ident, "u16") == 0) { free(ident); parser_step(parser); if (!parser_eat(parser, '[')) { parser_report(parser, "expected '['", parser->tok.loc); return NULL; } POperand* operand = parser_parse_operand_2(parser, parser_binary_prec); if (!parser_eat(parser, ']')) { parser_report(parser, "expected ']'", parser->tok.loc); poperand_free(operand); return NULL; } return poperand_new_unary(PoTy_Mem16, operand, loc); } else { free(ident); return parser_parse_operand_2(parser, parser_binary_prec); } } static inline void parser_skip_to_next_line(Parser* parser) { while (!parser_done(parser) && !parser_eat(parser, TT_Newline)) { parser_step(parser); } } PLine* parser_next(Parser* parser) { char* ident; Loc loc; PLabel* labels = parser_parse_labels(parser, &ident, &loc); const size_t max_ops_size = 64; // TODO: Move allocation out-of-band. POperand** ops = malloc(sizeof(POperand) * max_ops_size); size_t ops_size = 0; if (!parser_test(parser, TT_Eof) && !parser_test(parser, '\n')) { POperand* operand = parser_parse_operand_3(parser); if (!operand) { parser_skip_to_next_line(parser); goto error_free_ops; } ops[ops_size++] = operand; while (!parser_test(parser, TT_Eof) && !parser_test(parser, '\n') && ops_size < 3) { if (ops_size >= max_ops_size) { parser_report(parser, "exceeded maximum number of operands (64)", parser->tok.loc); parser_skip_to_next_line(parser); goto error_free_ops; } if (!parser_eat(parser, ',')) { parser_report(parser, "expected ','", parser->tok.loc); parser_skip_to_next_line(parser); goto error_free_ops; } POperand* operand = parser_parse_operand_3(parser); if (!operand) { parser_skip_to_next_line(parser); goto error_free_ops; } ops[ops_size++] = operand; } } if (!parser_eat(parser, '\n') && !parser_test(parser, TT_Eof)) { parser_report(parser, "expected newline", parser->tok.loc); goto error_free_ops; } parser_skip_newlines(parser); PLine* line = pline_new(ident, labels, loc, ops_size, ops); free(ops); return line; error_free_ops: for (size_t i = 0; i < ops_size; ++i) if (ops[i]) poperand_free(ops[i]); free(ops); plabel_free(labels); free(ident); return NULL; }