651 lines
18 KiB
C
651 lines
18 KiB
C
#include "parse.h"
|
|
#include "lex.h"
|
|
#include "report.h"
|
|
#include "str.h"
|
|
#include <assert.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
PLabel* plabel_new(PLabel* next, char* ident, bool sub_label, Loc loc)
|
|
{
|
|
PLabel* label = malloc(sizeof(PLabel));
|
|
*label = (PLabel) { next, ident, loc, sub_label };
|
|
return label;
|
|
}
|
|
|
|
void plabel_free(PLabel* label)
|
|
{
|
|
if (!label) {
|
|
return;
|
|
}
|
|
plabel_free(label->next);
|
|
free(label->ident);
|
|
free(label);
|
|
}
|
|
|
|
POperand* poperand_new_reg(Reg reg, Loc loc)
|
|
{
|
|
POperand* operand = malloc(sizeof(POperand));
|
|
*operand = (POperand) { .ty = PoTy_Reg, .loc = loc, .reg = reg };
|
|
return operand;
|
|
}
|
|
|
|
POperand* poperand_new_imm(uint16_t imm, Loc loc)
|
|
{
|
|
POperand* operand = malloc(sizeof(POperand));
|
|
*operand = (POperand) { .ty = PoTy_Imm, .loc = loc, .imm = imm };
|
|
return operand;
|
|
}
|
|
|
|
POperand* poperand_new_str(POperandTy ty, char* str, size_t str_len, Loc loc)
|
|
{
|
|
POperand* operand = malloc(sizeof(POperand));
|
|
*operand = (POperand) {
|
|
.ty = ty,
|
|
.loc = loc,
|
|
.str = str,
|
|
.str_len = str_len,
|
|
};
|
|
return operand;
|
|
}
|
|
|
|
POperand* poperand_new_unary(POperandTy ty, POperand* inner, Loc loc)
|
|
{
|
|
POperand* operand = malloc(sizeof(POperand));
|
|
*operand = (POperand) { .ty = ty, .loc = loc, .operand = inner };
|
|
return operand;
|
|
}
|
|
|
|
POperand* poperand_new_binary(
|
|
POperandTy ty, POperand* left, POperand* right, Loc loc)
|
|
{
|
|
POperand* operand = malloc(sizeof(POperand));
|
|
*operand
|
|
= (POperand) { .ty = ty, .loc = loc, .left = left, .right = right };
|
|
return operand;
|
|
}
|
|
|
|
void poperand_free(POperand* operand)
|
|
{
|
|
switch (operand->ty) {
|
|
case PoTy_Reg:
|
|
case PoTy_Imm:
|
|
break;
|
|
case PoTy_Ident:
|
|
case PoTy_SubLabel:
|
|
case PoTy_Str:
|
|
free(operand->str);
|
|
break;
|
|
case PoTy_Mem8:
|
|
case PoTy_Mem16:
|
|
case PoTy_Not:
|
|
case PoTy_Negate:
|
|
poperand_free(operand->operand);
|
|
break;
|
|
case PoTy_Or:
|
|
case PoTy_Xor:
|
|
case PoTy_And:
|
|
case PoTy_Shl:
|
|
case PoTy_Shr:
|
|
case PoTy_Add:
|
|
case PoTy_Sub:
|
|
case PoTy_Mul:
|
|
case PoTy_Div:
|
|
case PoTy_Mod:
|
|
poperand_free(operand->left);
|
|
poperand_free(operand->right);
|
|
break;
|
|
}
|
|
free(operand);
|
|
}
|
|
|
|
PLine* pline_new(
|
|
char* op, PLabel* labels, Loc loc, size_t ops_size, POperand** ops)
|
|
{
|
|
PLine* line = malloc(sizeof(PLine) + sizeof(POperand*) * ops_size);
|
|
*line = (PLine) {
|
|
.labels = labels,
|
|
.op = op,
|
|
.loc = loc,
|
|
.ops_size = ops_size,
|
|
};
|
|
for (size_t i = 0; i < ops_size; ++i) {
|
|
line->ops[i] = ops[i];
|
|
}
|
|
return line;
|
|
}
|
|
|
|
void pline_free(PLine* pline)
|
|
{
|
|
plabel_free(pline->labels);
|
|
free(pline->op);
|
|
for (size_t i = 0; i < pline->ops_size; ++i) {
|
|
poperand_free(pline->ops[i]);
|
|
}
|
|
free(pline);
|
|
}
|
|
|
|
PStmt* pstmt_new_line(Loc loc, PLine* line)
|
|
{
|
|
PStmt* stmt = malloc(sizeof(PStmt));
|
|
*stmt = (PStmt) { .ty = PStmtTy_Line, .loc = loc, .line = line };
|
|
return stmt;
|
|
}
|
|
|
|
PStmt* pstmt_new_ident(PStmtTy ty, Loc loc, char* ident)
|
|
{
|
|
PStmt* stmt = malloc(sizeof(PStmt));
|
|
*stmt = (PStmt) { .ty = ty, .loc = loc, .ident = ident, .value = NULL };
|
|
return stmt;
|
|
}
|
|
|
|
PStmt* pstmt_new_const(PStmtTy ty, Loc loc, char* ident, POperand* value)
|
|
{
|
|
PStmt* stmt = malloc(sizeof(PStmt));
|
|
*stmt = (PStmt) { .ty = ty, .loc = loc, .ident = ident, .value = value };
|
|
return stmt;
|
|
}
|
|
|
|
PStmt* pstmt_new_include(Loc loc, char* str)
|
|
{
|
|
PStmt* stmt = malloc(sizeof(PStmt));
|
|
*stmt = (PStmt) { .ty = PStmtTy_Include, .loc = loc, .str = str };
|
|
return stmt;
|
|
}
|
|
|
|
void pstmt_free(PStmt* stmt)
|
|
{
|
|
switch (stmt->ty) {
|
|
case PStmtTy_Line:
|
|
pline_free(stmt->line);
|
|
break;
|
|
case PStmtTy_Global:
|
|
case PStmtTy_Extern:
|
|
free(stmt->ident);
|
|
break;
|
|
case PStmtTy_Const:
|
|
free(stmt->ident);
|
|
poperand_free(stmt->value);
|
|
break;
|
|
case PStmtTy_Include:
|
|
free(stmt->str);
|
|
break;
|
|
}
|
|
free(stmt);
|
|
}
|
|
|
|
void parser_construct(Parser* parser, const char* filename, const char* text)
|
|
{
|
|
Lexer lexer;
|
|
lexer_construct(&lexer, filename, text);
|
|
|
|
*parser = (Parser) {
|
|
.lexer = lexer,
|
|
.tok = lexer_next(&lexer),
|
|
.eaten = (Tok) { 0 },
|
|
.error_occured = false,
|
|
};
|
|
}
|
|
|
|
bool parser_done(const Parser* parser)
|
|
{
|
|
return parser->tok.ty == TT_Eof;
|
|
}
|
|
|
|
bool parser_error_occured(const Parser* parser)
|
|
{
|
|
return parser->error_occured || parser->lexer.error_occured;
|
|
}
|
|
|
|
static inline void parser_step(Parser* parser)
|
|
{
|
|
parser->tok = lexer_next(&parser->lexer);
|
|
}
|
|
|
|
static inline bool parser_test(const Parser* parser, TokTy ty)
|
|
{
|
|
return parser->tok.ty == ty;
|
|
}
|
|
|
|
static inline bool parser_eat(Parser* parser, TokTy ty)
|
|
{
|
|
if (parser_test(parser, ty)) {
|
|
parser->eaten = parser->tok;
|
|
parser_step(parser);
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
static inline char* parser_text_val(const Parser* parser, Tok tok)
|
|
{
|
|
return asm_strndup(&parser->lexer.text[tok.loc.idx], tok.len);
|
|
}
|
|
|
|
static inline char literal_char_val(const char* str)
|
|
{
|
|
if (str[0] == '\\') {
|
|
switch (str[1]) {
|
|
case '0':
|
|
return 0;
|
|
case 't':
|
|
return '\t';
|
|
case 'n':
|
|
return '\n';
|
|
default:
|
|
return str[1];
|
|
}
|
|
} else {
|
|
return str[0];
|
|
}
|
|
}
|
|
|
|
static inline char* parser_str_val(
|
|
const Parser* parser, size_t* str_len, Tok tok)
|
|
{
|
|
char* lit = parser_text_val(parser, tok);
|
|
char* str = calloc(tok.len - 1, sizeof(char));
|
|
*str_len = 0;
|
|
for (size_t i = 1; i < tok.len - 1; ++i) {
|
|
str[*str_len] = literal_char_val(&lit[i]);
|
|
*str_len += 1;
|
|
}
|
|
free(lit);
|
|
return str;
|
|
}
|
|
|
|
static inline void parser_report(Parser* parser, const char* msg, Loc loc)
|
|
{
|
|
parser->error_occured = true;
|
|
REPORTF_ERROR("%s", msg);
|
|
print_report_loc(parser->lexer.filename,
|
|
parser->lexer.text,
|
|
parser->lexer.text_len,
|
|
loc);
|
|
}
|
|
|
|
static inline void parser_skip_newlines(Parser* parser)
|
|
{
|
|
while (parser_eat(parser, '\n')) { }
|
|
}
|
|
|
|
static inline PLabel* parser_parse_labels(
|
|
Parser* parser, char** ident, Loc* ident_loc)
|
|
{
|
|
*ident = NULL;
|
|
PLabel* labels = NULL;
|
|
while (parser->tok.ty != TT_Eof && *ident == NULL) {
|
|
parser_skip_newlines(parser);
|
|
Loc loc = parser->tok.loc;
|
|
if (parser_eat(parser, '.')) {
|
|
if (!parser_eat(parser, TT_Ident)) {
|
|
parser_report(parser, "expected identifier", parser->tok.loc);
|
|
plabel_free(labels);
|
|
return NULL;
|
|
}
|
|
char* label_ident = parser_text_val(parser, parser->eaten);
|
|
if (!parser_eat(parser, ':')) {
|
|
parser_report(parser, "expected ':'", parser->tok.loc);
|
|
plabel_free(labels);
|
|
free(label_ident);
|
|
return NULL;
|
|
}
|
|
labels = plabel_new(labels, label_ident, true, loc);
|
|
} else if (parser_eat(parser, TT_Ident)) {
|
|
*ident = parser_text_val(parser, parser->eaten);
|
|
*ident_loc = loc;
|
|
if (!parser_eat(parser, ':')) {
|
|
break;
|
|
}
|
|
labels = plabel_new(labels, *ident, false, loc);
|
|
*ident = NULL;
|
|
} else {
|
|
parser_report(
|
|
parser, "expected identifier or ':'", parser->tok.loc);
|
|
plabel_free(labels);
|
|
return NULL;
|
|
}
|
|
}
|
|
return labels;
|
|
}
|
|
|
|
static const int parser_binary_prec = 6;
|
|
static inline POperand* parser_parse_operand_2(Parser* parser, int prec);
|
|
|
|
static inline POperand* parser_parse_operand_0(Parser* parser)
|
|
{
|
|
Loc loc = parser->tok.loc;
|
|
if (parser_eat(parser, TT_Ident)) {
|
|
char* ident = parser_text_val(parser, parser->eaten);
|
|
const char* reg_key[10] = {
|
|
"r0", "r1", "r2", "r3", "r4", "rbp", "rsp", "rfl", "rcs", "rip"
|
|
};
|
|
Reg reg_val[10] = { R0, R1, R2, R3, R4, Rbp, Rsp, Rfl, Rcs, Rip };
|
|
for (size_t i = 0; i < 10; ++i) {
|
|
if (strcmp(reg_key[i], ident) == 0) {
|
|
free(ident);
|
|
return poperand_new_reg(reg_val[i], loc);
|
|
}
|
|
}
|
|
return poperand_new_str(PoTy_Ident, ident, parser->eaten.len, loc);
|
|
} else if (parser_eat(parser, TT_Int)) {
|
|
char* str = parser_text_val(parser, parser->eaten);
|
|
uint64_t val = strtoull(str, NULL, 10);
|
|
free(str);
|
|
if (val > 0xffff) {
|
|
parser_report(parser,
|
|
"integers larger than 65536 not supported",
|
|
parser->tok.loc);
|
|
return NULL;
|
|
}
|
|
uint16_t imm = (uint16_t)val;
|
|
return poperand_new_imm(imm, loc);
|
|
} else if (parser_eat(parser, TT_Binary)) {
|
|
char* str = parser_text_val(parser, parser->eaten);
|
|
uint64_t val = strtoull(&str[2], NULL, 2);
|
|
free(str);
|
|
if (val > 0xffff) {
|
|
parser_report(parser,
|
|
"integers larger than 65536 not supported",
|
|
parser->tok.loc);
|
|
return NULL;
|
|
}
|
|
uint16_t imm = (uint16_t)val;
|
|
return poperand_new_imm(imm, loc);
|
|
} else if (parser_eat(parser, TT_Hex)) {
|
|
char* str = parser_text_val(parser, parser->eaten);
|
|
uint64_t val = strtoull(&str[2], NULL, 16);
|
|
free(str);
|
|
if (val > 0xffff) {
|
|
parser_report(parser,
|
|
"integers larger than 65536 not supported",
|
|
parser->tok.loc);
|
|
return NULL;
|
|
}
|
|
uint16_t imm = (uint16_t)val;
|
|
return poperand_new_imm(imm, loc);
|
|
} else if (parser_eat(parser, TT_Char)) {
|
|
char* str = parser_text_val(parser, parser->eaten);
|
|
uint16_t imm = (uint16_t)literal_char_val(&str[1]);
|
|
free(str);
|
|
return poperand_new_imm(imm, loc);
|
|
} else if (parser_eat(parser, TT_Str)) {
|
|
size_t str_len;
|
|
char* str = parser_str_val(parser, &str_len, parser->eaten);
|
|
return poperand_new_str(PoTy_Str, str, str_len, loc);
|
|
} else if (parser_eat(parser, '.')) {
|
|
if (!parser_eat(parser, TT_Ident)) {
|
|
parser_report(parser, "expected identifier", parser->tok.loc);
|
|
return NULL;
|
|
}
|
|
char* ident = parser_text_val(parser, parser->eaten);
|
|
return poperand_new_str(PoTy_SubLabel, ident, parser->eaten.len, loc);
|
|
} else if (parser_eat(parser, '(')) {
|
|
POperand* operand = parser_parse_operand_2(parser, parser_binary_prec);
|
|
if (!parser_eat(parser, ')')) {
|
|
parser_report(parser, "expected ')'", parser->tok.loc);
|
|
poperand_free(operand);
|
|
return NULL;
|
|
}
|
|
return operand;
|
|
} else {
|
|
parser_report(parser, "expected operand", parser->tok.loc);
|
|
return NULL;
|
|
}
|
|
}
|
|
|
|
static inline POperand* parser_parse_operand_1(Parser* parser)
|
|
{
|
|
|
|
Loc loc = parser->tok.loc;
|
|
if (parser_eat(parser, '-')) {
|
|
POperand* operand = parser_parse_operand_1(parser);
|
|
return poperand_new_unary(PoTy_Negate, operand, loc);
|
|
} else if (parser_eat(parser, '!')) {
|
|
POperand* operand = parser_parse_operand_1(parser);
|
|
return poperand_new_unary(PoTy_Not, operand, loc);
|
|
} else {
|
|
return parser_parse_operand_0(parser);
|
|
}
|
|
}
|
|
|
|
static inline POperand* parser_parse_operand_2(Parser* parser, int prec)
|
|
{
|
|
const POperandTy op_tys[] = {
|
|
PoTy_Or,
|
|
PoTy_Xor,
|
|
PoTy_And,
|
|
PoTy_Shr,
|
|
PoTy_Shl,
|
|
PoTy_Add,
|
|
PoTy_Sub,
|
|
PoTy_Mul,
|
|
PoTy_Div,
|
|
PoTy_Mod,
|
|
};
|
|
const TokTy op_tts[] = {
|
|
'|',
|
|
'^',
|
|
'&',
|
|
TT_DoubleGt,
|
|
TT_DoubleLt,
|
|
'+',
|
|
'-',
|
|
'*',
|
|
'/',
|
|
'%',
|
|
};
|
|
const int op_precs[] = { 6, 5, 4, 3, 3, 2, 2, 1, 1, 1 };
|
|
static_assert(sizeof(op_tys) / sizeof(op_tys[0])
|
|
== sizeof(op_tts) / sizeof(op_tts[0]),
|
|
"misaligned");
|
|
static_assert(sizeof(op_tys) / sizeof(op_tys[0])
|
|
== sizeof(op_precs) / sizeof(op_precs[0]),
|
|
"misaligned");
|
|
|
|
if (prec == 0) {
|
|
return parser_parse_operand_1(parser);
|
|
}
|
|
POperand* left = parser_parse_operand_2(parser, prec - 1);
|
|
bool should_continue = true;
|
|
while (should_continue) {
|
|
should_continue = false;
|
|
for (size_t i = 0; i < sizeof(op_tys) / sizeof(op_tys[0]); ++i) {
|
|
if (prec >= op_precs[i] && parser_eat(parser, op_tts[i])) {
|
|
POperand* right = parser_parse_operand_2(parser, prec - 1);
|
|
left = poperand_new_binary(op_tys[i], left, right, left->loc);
|
|
should_continue = true;
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
return left;
|
|
}
|
|
|
|
static inline POperand* parser_parse_operand_3(Parser* parser)
|
|
{
|
|
Loc loc = parser->tok.loc;
|
|
if (parser_eat(parser, TT_LBracket)) {
|
|
parser_report(parser, "expected 'u8' or 'u16' before '['", loc);
|
|
return NULL;
|
|
}
|
|
if (!parser_test(parser, TT_Ident)) {
|
|
return parser_parse_operand_2(parser, parser_binary_prec);
|
|
}
|
|
char* ident = parser_text_val(parser, parser->tok);
|
|
if (strcmp(ident, "u8") == 0) {
|
|
free(ident);
|
|
parser_step(parser);
|
|
if (!parser_eat(parser, '[')) {
|
|
parser_report(parser, "expected '['", parser->tok.loc);
|
|
return NULL;
|
|
}
|
|
POperand* operand = parser_parse_operand_2(parser, parser_binary_prec);
|
|
if (!parser_eat(parser, ']')) {
|
|
parser_report(parser, "expected ']'", parser->tok.loc);
|
|
poperand_free(operand);
|
|
return NULL;
|
|
}
|
|
return poperand_new_unary(PoTy_Mem8, operand, loc);
|
|
} else if (strcmp(ident, "u16") == 0) {
|
|
free(ident);
|
|
parser_step(parser);
|
|
if (!parser_eat(parser, '[')) {
|
|
parser_report(parser, "expected '['", parser->tok.loc);
|
|
return NULL;
|
|
}
|
|
POperand* operand = parser_parse_operand_2(parser, parser_binary_prec);
|
|
if (!parser_eat(parser, ']')) {
|
|
parser_report(parser, "expected ']'", parser->tok.loc);
|
|
poperand_free(operand);
|
|
return NULL;
|
|
}
|
|
return poperand_new_unary(PoTy_Mem16, operand, loc);
|
|
} else {
|
|
free(ident);
|
|
return parser_parse_operand_2(parser, parser_binary_prec);
|
|
}
|
|
}
|
|
|
|
static inline void parser_skip_to_next_line(Parser* parser)
|
|
{
|
|
while (!parser_done(parser) && !parser_eat(parser, TT_Newline)) {
|
|
parser_step(parser);
|
|
}
|
|
}
|
|
|
|
PLine* parser_next_line(Parser* parser)
|
|
{
|
|
char* ident;
|
|
Loc loc;
|
|
PLabel* labels = parser_parse_labels(parser, &ident, &loc);
|
|
|
|
const size_t max_ops_size = 64;
|
|
POperand* ops[64];
|
|
size_t ops_size = 0;
|
|
|
|
if (!parser_test(parser, TT_Eof) && !parser_test(parser, '\n')) {
|
|
POperand* operand = parser_parse_operand_3(parser);
|
|
if (!operand) {
|
|
parser_skip_to_next_line(parser);
|
|
goto error_free_ops;
|
|
}
|
|
ops[ops_size++] = operand;
|
|
while (!parser_test(parser, TT_Eof) && !parser_test(parser, '\n')
|
|
&& ops_size < 3) {
|
|
if (ops_size >= max_ops_size) {
|
|
parser_report(parser,
|
|
"exceeded maximum number of operands (64)",
|
|
parser->tok.loc);
|
|
parser_skip_to_next_line(parser);
|
|
goto error_free_ops;
|
|
}
|
|
if (!parser_eat(parser, ',')) {
|
|
parser_report(parser, "expected ','", parser->tok.loc);
|
|
parser_skip_to_next_line(parser);
|
|
goto error_free_ops;
|
|
}
|
|
POperand* operand = parser_parse_operand_3(parser);
|
|
if (!operand) {
|
|
parser_skip_to_next_line(parser);
|
|
goto error_free_ops;
|
|
}
|
|
ops[ops_size++] = operand;
|
|
}
|
|
}
|
|
if (!parser_eat(parser, '\n') && !parser_test(parser, TT_Eof)) {
|
|
parser_report(parser, "expected newline", parser->tok.loc);
|
|
goto error_free_ops;
|
|
}
|
|
parser_skip_newlines(parser);
|
|
|
|
PLine* line = pline_new(ident, labels, loc, ops_size, ops);
|
|
return line;
|
|
|
|
error_free_ops:
|
|
for (size_t i = 0; i < ops_size; ++i)
|
|
if (ops[i])
|
|
poperand_free(ops[i]);
|
|
plabel_free(labels);
|
|
free(ident);
|
|
return NULL;
|
|
}
|
|
|
|
static inline PStmtTy pstmt_keyword_ty(const char* ident)
|
|
{
|
|
const PStmtTy tys[] = {
|
|
PStmtTy_Global,
|
|
PStmtTy_Extern,
|
|
PStmtTy_Const,
|
|
PStmtTy_Include,
|
|
};
|
|
const char* keywords[] = {
|
|
"global",
|
|
"extern",
|
|
"const",
|
|
"include",
|
|
};
|
|
static_assert(
|
|
sizeof(keywords) / sizeof(keywords[0]) == sizeof(tys) / sizeof(tys[0]),
|
|
"mismatch");
|
|
|
|
size_t amount = sizeof(tys) / sizeof(tys[0]);
|
|
for (size_t i = 0; i < amount; ++i) {
|
|
if (strcmp(ident, keywords[i]) == 0) {
|
|
return tys[i];
|
|
}
|
|
}
|
|
return PStmtTy_Line;
|
|
}
|
|
|
|
PStmt* parser_next_stmt(Parser* parser)
|
|
{
|
|
parser_skip_newlines(parser);
|
|
Loc loc = parser->tok.loc;
|
|
if (!parser_test(parser, TT_Ident)) {
|
|
PLine* line = parser_next_line(parser);
|
|
if (!line)
|
|
return NULL;
|
|
return pstmt_new_line(loc, line);
|
|
}
|
|
char* keyword = parser_text_val(parser, parser->tok);
|
|
PStmtTy ty = pstmt_keyword_ty(keyword);
|
|
free(keyword);
|
|
switch (ty) {
|
|
case PStmtTy_Global:
|
|
case PStmtTy_Extern:
|
|
case PStmtTy_Const: {
|
|
parser_step(parser);
|
|
if (!parser_eat(parser, TT_Ident)) {
|
|
parser_report(parser, "expected identifier", parser->tok.loc);
|
|
return NULL;
|
|
}
|
|
char* ident = parser_text_val(parser, parser->eaten);
|
|
if (ty != PStmtTy_Const) {
|
|
return pstmt_new_ident(ty, loc, ident);
|
|
}
|
|
POperand* value = parser_parse_operand_3(parser);
|
|
return pstmt_new_const(ty, loc, ident, value);
|
|
break;
|
|
}
|
|
case PStmtTy_Include: {
|
|
parser_step(parser);
|
|
if (!parser_eat(parser, TT_Str)) {
|
|
parser_report(parser, "expected string", parser->tok.loc);
|
|
return NULL;
|
|
}
|
|
size_t str_len;
|
|
char* str = parser_str_val(parser, &str_len, parser->eaten);
|
|
return pstmt_new_include(loc, str);
|
|
break;
|
|
}
|
|
default:
|
|
break;
|
|
}
|
|
PLine* line = parser_next_line(parser);
|
|
if (!line)
|
|
return NULL;
|
|
return pstmt_new_line(loc, line);
|
|
}
|