vc4/asm/parse.c
2025-10-02 22:44:00 +02:00

843 lines
23 KiB
C

#include "parse.h"
#include "report.h"
#include <stddef.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
typedef enum {
TT_Err,
TT_Eof,
TT_Ident,
TT_Int,
TT_Binary,
TT_Hex,
TT_Char,
TT_Str,
TT_Newline = '\n',
TT_DoubleLt,
TT_DoubleGt,
TT_Pipe = '|',
TT_Hat = '^',
TT_Ampersand = '&',
TT_Plus = '+',
TT_Minus = '-',
TT_Asterisk = '*',
TT_Slash = '/',
TT_Percent = '%',
TT_LParen = '(',
TT_RParen = ')',
TT_LBracket = '[',
TT_RBracket = ']',
TT_Dot = '.',
TT_Comma = ',',
TT_Colon = ':',
TT_Exclamation = '!',
} TokTy;
typedef struct {
TokTy ty;
Loc loc;
size_t len;
} Tok;
typedef struct {
const char* filename;
const char* text;
size_t text_len;
size_t idx;
int line;
int col;
char ch;
bool error_occured;
} Lexer;
static void lexer_init(Lexer* lexer, const char* filename, const char* text);
static Tok lexer_next(Lexer* lexer);
static void lexer_report(Lexer* lexer, const char* msg, Loc loc);
static int lexer_skip_literal_char(Lexer* lexer);
static Tok lexer_tok(const Lexer* lexer, TokTy ty, Loc loc);
static Loc lexer_loc(const Lexer* lexer);
static void lexer_step(Lexer* lexer);
static bool lexer_done(const Lexer* lexer);
static bool str_includes(const char* str, char ch);
struct Parser {
Lexer lexer;
Tok tok;
Tok eaten;
bool error_occured;
bool label_fail;
Tok last_ident_tok;
};
static PExpr* parser_parse_operand_3(Parser* parser);
static PExpr* parser_parse_operand_2(Parser* parser, int prec);
static PExpr* parser_parse_operand_1(Parser* parser);
static PExpr* parser_parse_operand_0(Parser* parser);
static void parser_skip_to_next_line(Parser* parser);
static void parser_report(Parser* parser, const char* msg, Loc loc);
static char literal_char_val(const char* str);
static char* parser_str_val(const Parser* parser, size_t* str_len, Tok tok);
static char* parser_tok_strdup(const Parser* parser, Tok tok);
static bool parser_tok_streq(const Parser* parser, Tok tok, const char* text);
static bool parser_eat(Parser* parser, TokTy ty);
static bool parser_test(const Parser* parser, TokTy ty);
static void parser_step(Parser* parser);
Parser* parser_new(const char* filename, const char* text)
{
Parser* parser = malloc(sizeof(Parser));
*parser = (Parser) {
.lexer = {},
.tok = {},
.eaten = {},
.error_occured = false,
.label_fail = false,
.last_ident_tok = {},
};
lexer_init(&parser->lexer, filename, text);
parser->tok = lexer_next(&parser->lexer);
return parser;
}
void parser_free(Parser* parser)
{
free(parser);
}
bool parser_next_is_const(Parser* parser)
{
return parser_test(parser, TT_Ident)
&& parser_tok_streq(parser, parser->tok, "const");
}
bool parser_next_is_include(Parser* parser)
{
return parser_test(parser, TT_Ident)
&& parser_tok_streq(parser, parser->tok, "include");
}
PConst* parser_parse_const(Parser* parser)
{
Loc loc = parser->tok.loc;
parser_step(parser);
if (!parser_eat(parser, TT_Ident)) {
parser_report(parser, "expected identifier", parser->tok.loc);
return nullptr;
}
char* ident = parser_tok_strdup(parser, parser->eaten);
PExpr* value = parser_parse_operand_3(parser);
PConst* stmt = malloc(sizeof(PConst));
*stmt = (PConst) { loc, ident, value };
return stmt;
}
PInclude* parser_parse_include(Parser* parser)
{
Loc loc = parser->tok.loc;
parser_step(parser);
if (!parser_eat(parser, TT_Str)) {
parser_report(parser, "expected string", parser->tok.loc);
return nullptr;
}
size_t str_len;
char* str = parser_str_val(parser, &str_len, parser->eaten);
PInclude* stmt = malloc(sizeof(PInclude));
*stmt = (PInclude) { loc, str };
return stmt;
}
PLabel* parser_parse_label(Parser* parser)
{
if (parser->tok.ty == TT_Eof || parser->label_fail)
return nullptr;
parser_skip_newlines(parser);
Loc loc = parser->tok.loc;
if (parser_eat(parser, '.')) {
if (!parser_eat(parser, TT_Ident)) {
parser_report(parser, "expected identifier", parser->tok.loc);
return nullptr;
}
char* ident = parser_tok_strdup(parser, parser->eaten);
if (!parser_eat(parser, ':')) {
parser_report(parser, "expected ':'", parser->tok.loc);
free(ident);
return nullptr;
}
PLabel* label = malloc(sizeof(PLabel));
*label = (PLabel) { loc, ident, .local = true };
return label;
} else if (parser_eat(parser, TT_Ident)) {
parser->last_ident_tok = parser->eaten;
if (!parser_eat(parser, ':')) {
parser->label_fail = true;
return nullptr;
}
char* ident = parser_tok_strdup(parser, parser->last_ident_tok);
PLabel* label = malloc(sizeof(PLabel));
*label = (PLabel) { loc, ident, .local = false };
return label;
} else {
parser_report(parser, "expected identifier or ':'", parser->tok.loc);
return nullptr;
}
}
PLine* parser_parse_line(Parser* parser)
{
constexpr size_t max_ops_size = 2;
PExpr* ops[max_ops_size];
size_t ops_size = 0;
if (!parser_test(parser, TT_Eof) && !parser_test(parser, '\n')) {
PExpr* operand = parser_parse_operand_3(parser);
if (!operand) {
parser_skip_to_next_line(parser);
goto error_free_ops;
}
ops[ops_size++] = operand;
while (!parser_test(parser, TT_Eof) && !parser_test(parser, '\n')
&& ops_size < 3) {
if (ops_size >= max_ops_size) {
parser_report(parser,
"exceeded maximum number of operands (64)",
parser->tok.loc);
parser_skip_to_next_line(parser);
goto error_free_ops;
}
if (!parser_eat(parser, ',')) {
parser_report(parser, "expected ','", parser->tok.loc);
parser_skip_to_next_line(parser);
goto error_free_ops;
}
PExpr* operand = parser_parse_operand_3(parser);
if (!operand) {
parser_skip_to_next_line(parser);
goto error_free_ops;
}
ops[ops_size++] = operand;
}
}
if (!parser_eat(parser, '\n') && !parser_test(parser, TT_Eof)) {
parser_report(parser, "expected newline", parser->tok.loc);
goto error_free_ops;
}
parser_skip_newlines(parser);
PLine* line = malloc(sizeof(PLine));
*line = (PLine) {
parser->last_ident_tok.loc,
parser_tok_strdup(parser, parser->last_ident_tok),
.ops = {},
.ops_size = ops_size,
};
for (size_t i = 0; i < ops_size; ++i)
line->ops[i] = ops[i];
return line;
error_free_ops:
for (size_t i = 0; i < ops_size; ++i)
if (ops[i])
pexpr_free(ops[i]);
return nullptr;
}
static const int parser_binary_prec = 6;
PExpr* parser_parse_operand_3(Parser* parser)
{
Loc loc = parser->tok.loc;
if (parser_eat(parser, TT_LBracket)) {
parser_report(parser, "expected 'u8' or 'u16' before '['", loc);
return NULL;
}
if (!parser_test(parser, TT_Ident)) {
return parser_parse_operand_2(parser, parser_binary_prec);
}
if (parser_eat(parser, '[')) {
PExpr* operand = parser_parse_operand_2(parser, parser_binary_prec);
if (!parser_eat(parser, ']')) {
parser_report(parser, "expected ']'", parser->tok.loc);
pexpr_free(operand);
return nullptr;
}
PExpr* expr = malloc(sizeof(PExpr));
*expr = (PExpr) {
.ty = PExprTy_Mem,
.loc = loc,
.operand = operand,
};
return expr;
} else {
return parser_parse_operand_2(parser, parser_binary_prec);
}
}
PExpr* parser_parse_operand_2(Parser* parser, int prec)
{
const PExprTy op_tys[] = {
PExprTy_Or,
PExprTy_Xor,
PExprTy_And,
PExprTy_Shr,
PExprTy_Shl,
PExprTy_Add,
PExprTy_Sub,
PExprTy_Mul,
PExprTy_Div,
PExprTy_Mod,
};
const TokTy op_tts[] = {
'|',
'^',
'&',
TT_DoubleGt,
TT_DoubleLt,
'+',
'-',
'*',
'/',
'%',
};
const int op_precs[] = { 6, 5, 4, 3, 3, 2, 2, 1, 1, 1 };
static_assert(sizeof(op_tys) / sizeof(op_tys[0])
== sizeof(op_tts) / sizeof(op_tts[0]),
"misaligned");
static_assert(sizeof(op_tys) / sizeof(op_tys[0])
== sizeof(op_precs) / sizeof(op_precs[0]),
"misaligned");
if (prec == 0) {
return parser_parse_operand_1(parser);
}
PExpr* left = parser_parse_operand_2(parser, prec - 1);
bool should_continue = true;
while (should_continue) {
should_continue = false;
for (size_t i = 0; i < sizeof(op_tys) / sizeof(op_tys[0]); ++i) {
if (prec >= op_precs[i] && parser_eat(parser, op_tts[i])) {
PExpr* right = parser_parse_operand_2(parser, prec - 1);
PExpr* new_left = malloc(sizeof(PExpr));
*new_left = (PExpr) {
.ty = op_tys[i],
.loc = left->loc,
.left = left,
.right = right,
};
left = new_left;
should_continue = true;
break;
}
}
}
return left;
}
PExpr* parser_parse_operand_1(Parser* parser)
{
Loc loc = parser->tok.loc;
if (parser_eat(parser, '-')) {
PExpr* operand = parser_parse_operand_1(parser);
PExpr* expr = malloc(sizeof(PExpr));
*expr = (PExpr) {
.ty = PExprTy_Negate,
.loc = loc,
.operand = operand,
};
return expr;
} else if (parser_eat(parser, '!')) {
PExpr* operand = parser_parse_operand_1(parser);
PExpr* expr = malloc(sizeof(PExpr));
*expr = (PExpr) {
.ty = PExprTy_Not,
.loc = loc,
.operand = operand,
};
return expr;
} else {
return parser_parse_operand_0(parser);
}
}
PExpr* parser_parse_operand_0(Parser* parser)
{
Loc loc = parser->tok.loc;
if (parser_eat(parser, TT_Ident)) {
char* ident = parser_tok_strdup(parser, parser->eaten);
PExpr* expr = malloc(sizeof(PExpr));
*expr = (PExpr) {
.ty = PExprTy_Ident,
.loc = loc,
.str = ident,
};
return expr;
} else if (parser_eat(parser, TT_Int)) {
char* str = parser_tok_strdup(parser, parser->eaten);
uint64_t val = strtoull(str, NULL, 10);
free(str);
if (val > 0xffff) {
parser_report(parser,
"integers larger than 65536 not supported",
parser->tok.loc);
return nullptr;
}
uint16_t imm = (uint16_t)val;
PExpr* expr = malloc(sizeof(PExpr));
*expr = (PExpr) {
.ty = PExprTy_Imm,
.loc = loc,
.imm = imm,
};
return expr;
} else if (parser_eat(parser, TT_Binary)) {
char* str = parser_tok_strdup(parser, parser->eaten);
uint64_t val = strtoull(&str[2], NULL, 2);
free(str);
if (val > 0xffff) {
parser_report(parser,
"integers larger than 65536 not supported",
parser->tok.loc);
return NULL;
}
uint16_t imm = (uint16_t)val;
PExpr* expr = malloc(sizeof(PExpr));
*expr = (PExpr) {
.ty = PExprTy_Imm,
.loc = loc,
.imm = imm,
};
return expr;
} else if (parser_eat(parser, TT_Hex)) {
char* str = parser_tok_strdup(parser, parser->eaten);
uint64_t val = strtoull(&str[2], NULL, 16);
free(str);
if (val > 0xffff) {
parser_report(parser,
"integers larger than 65536 not supported",
parser->tok.loc);
return NULL;
}
uint16_t imm = (uint16_t)val;
PExpr* expr = malloc(sizeof(PExpr));
*expr = (PExpr) {
.ty = PExprTy_Imm,
.loc = loc,
.imm = imm,
};
return expr;
} else if (parser_eat(parser, TT_Char)) {
char* str = parser_tok_strdup(parser, parser->eaten);
uint16_t imm = (uint16_t)literal_char_val(&str[1]);
free(str);
PExpr* expr = malloc(sizeof(PExpr));
*expr = (PExpr) {
.ty = PExprTy_Imm,
.loc = loc,
.imm = imm,
};
return expr;
} else if (parser_eat(parser, TT_Str)) {
size_t str_len;
char* str = parser_str_val(parser, &str_len, parser->eaten);
PExpr* expr = malloc(sizeof(PExpr));
*expr = (PExpr) {
.ty = PExprTy_Str,
.loc = loc,
.str = str,
};
return expr;
} else if (parser_eat(parser, '.')) {
if (!parser_eat(parser, TT_Ident)) {
parser_report(parser, "expected identifier", parser->tok.loc);
return NULL;
}
char* ident = parser_tok_strdup(parser, parser->eaten);
PExpr* expr = malloc(sizeof(PExpr));
*expr = (PExpr) {
.ty = PExprTy_SubLabel,
.loc = loc,
.str = ident,
};
return expr;
} else if (parser_eat(parser, '(')) {
PExpr* operand = parser_parse_operand_2(parser, parser_binary_prec);
if (!parser_eat(parser, ')')) {
parser_report(parser, "expected ')'", parser->tok.loc);
pexpr_free(operand);
return NULL;
}
return operand;
} else {
parser_report(parser, "expected operand", parser->tok.loc);
return NULL;
}
}
void parser_skip_to_next_line(Parser* parser)
{
while (!parser_done(parser) && !parser_eat(parser, TT_Newline)) {
parser_step(parser);
}
}
bool parser_error_occured(const Parser* parser)
{
return parser->error_occured || parser->lexer.error_occured;
}
void parser_skip_newlines(Parser* parser)
{
while (parser_eat(parser, '\n')) { }
}
void parser_report(Parser* parser, const char* msg, Loc loc)
{
parser->error_occured = true;
fprintf(stderr, FMT_ERROR("%s"), msg);
loc_pretty_print(loc, parser->lexer.text, parser->lexer.text_len);
}
char* parser_str_val(const Parser* parser, size_t* str_len, Tok tok)
{
char* lit = parser_tok_strdup(parser, tok);
char* str = calloc(tok.len - 1, sizeof(char));
*str_len = 0;
for (size_t i = 1; i < tok.len - 1; ++i) {
str[*str_len] = literal_char_val(&lit[i]);
*str_len += 1;
}
free(lit);
return str;
}
char literal_char_val(const char* str)
{
if (str[0] == '\\') {
switch (str[1]) {
case '0':
return 0;
case 't':
return '\t';
case 'n':
return '\n';
default:
return str[1];
}
} else {
return str[0];
}
}
char* parser_tok_strdup(const Parser* parser, Tok tok)
{
return strndup(&parser->lexer.text[tok.loc.idx], tok.len);
}
bool parser_tok_streq(const Parser* parser, Tok tok, const char* text)
{
return tok.len == strlen(text)
&& strncmp(&parser->lexer.text[tok.loc.idx], text, tok.len) == 0;
}
bool parser_eat(Parser* parser, TokTy ty)
{
if (parser_test(parser, ty)) {
parser->eaten = parser->tok;
parser_step(parser);
return true;
}
return false;
}
bool parser_test(const Parser* parser, TokTy ty)
{
return parser->tok.ty == ty;
}
void parser_step(Parser* parser)
{
parser->tok = lexer_next(&parser->lexer);
}
bool parser_done(const Parser* parser)
{
return parser->tok.ty == TT_Eof;
}
void lexer_init(Lexer* lexer, const char* filename, const char* text)
{
*lexer = (Lexer) {
.filename = filename,
.text = text,
.text_len = strlen(text),
.idx = 0,
.line = 1,
.col = 1,
.ch = text[0],
.error_occured = false,
};
}
Tok lexer_next(Lexer* lexer)
{
const char* ident_chars = "abcdefghijklmnopqrstuvwxyz"
"ABCDEFGHIJKLMNOPQRSTUVWXYZ_$";
const char* int_chars = "1234567890";
const char* hex_chars = "01234567889abcdefABCDEF";
Loc loc = lexer_loc(lexer);
if (lexer_done(lexer)) {
return lexer_tok(lexer, TT_Eof, loc);
}
if (lexer->ch == '\n') {
lexer_step(lexer);
return lexer_tok(lexer, '\n', loc);
} else if (str_includes(" \t", lexer->ch)) {
while (!lexer_done(lexer) && str_includes(" \t", lexer->ch)) {
lexer_step(lexer);
}
return lexer_next(lexer);
} else if (str_includes(ident_chars, lexer->ch)) {
while (!lexer_done(lexer)
&& (str_includes(ident_chars, lexer->ch)
|| str_includes(int_chars, lexer->ch))) {
lexer_step(lexer);
}
return lexer_tok(lexer, TT_Ident, loc);
} else if (str_includes(int_chars, lexer->ch) && lexer->ch != '0') {
while (!lexer_done(lexer) && (str_includes(int_chars, lexer->ch))) {
lexer_step(lexer);
}
return lexer_tok(lexer, TT_Int, loc);
} else if (lexer->ch == ';') {
while (!lexer_done(lexer) && lexer->ch != '\n') {
lexer_step(lexer);
}
return lexer_next(lexer);
} else if (lexer->ch == '0') {
lexer_step(lexer);
if (lexer->ch == 'b') {
lexer_step(lexer);
if (lexer_done(lexer) || !str_includes("01", lexer->ch)) {
lexer_report(lexer, "malformed binary literal", loc);
return lexer_tok(lexer, TT_Err, loc);
}
while (!lexer_done(lexer) && str_includes("01", lexer->ch)) {
lexer_step(lexer);
}
return lexer_tok(lexer, TT_Binary, loc);
} else if (lexer->ch == 'x') {
lexer_step(lexer);
if (lexer_done(lexer) || !str_includes(hex_chars, lexer->ch)) {
lexer_report(lexer, "malformed hex literal", loc);
return lexer_tok(lexer, TT_Err, loc);
}
while (!lexer_done(lexer) && str_includes(hex_chars, lexer->ch)) {
lexer_step(lexer);
}
return lexer_tok(lexer, TT_Hex, loc);
} else {
return lexer_tok(lexer, TT_Int, loc);
}
} else if (lexer->ch == '\'') {
lexer_step(lexer);
lexer_skip_literal_char(lexer);
if (lexer_done(lexer) || lexer->ch != '\'') {
lexer_report(lexer, "malformed character literal", loc);
return lexer_tok(lexer, TT_Err, loc);
}
lexer_step(lexer);
return lexer_tok(lexer, TT_Char, loc);
} else if (lexer->ch == '"') {
lexer_step(lexer);
while (!lexer_done(lexer) && lexer->ch != '"') {
lexer_skip_literal_char(lexer);
}
if (lexer_done(lexer) || lexer->ch != '"') {
lexer_report(lexer, "malformed string literal", loc);
return lexer_tok(lexer, TT_Err, loc);
}
lexer_step(lexer);
return lexer_tok(lexer, TT_Str, loc);
} else if (lexer->ch == '<') {
lexer_step(lexer);
if (!lexer_done(lexer) && lexer->ch == '<') {
lexer_step(lexer);
return lexer_tok(lexer, TT_DoubleLt, loc);
} else {
lexer_report(lexer, "expected '<'", loc);
return lexer_tok(lexer, TT_Err, loc);
}
} else if (lexer->ch == '>') {
lexer_step(lexer);
if (!lexer_done(lexer) && lexer->ch == '>') {
lexer_step(lexer);
return lexer_tok(lexer, TT_DoubleGt, loc);
} else {
lexer_report(lexer, "expected '>'", loc);
return lexer_tok(lexer, TT_Err, loc);
}
} else if (str_includes("|^&+-*/%()[].,:!", lexer->ch)) {
char ch = lexer->ch;
lexer_step(lexer);
return lexer_tok(lexer, (TokTy)ch, loc);
} else {
lexer_report(lexer, "illegal character", loc);
lexer_step(lexer);
return lexer_tok(lexer, TT_Err, loc);
}
}
int lexer_skip_literal_char(Lexer* lexer)
{
char ch = lexer->ch;
lexer_step(lexer);
if (ch == '\\') {
if (lexer_done(lexer))
return -1;
lexer_step(lexer);
}
return 0;
}
void lexer_step(Lexer* lexer)
{
if (lexer_done(lexer)) {
return;
}
if (lexer->ch == '\n') {
lexer->line += 1;
lexer->col = 1;
} else {
lexer->col += 1;
}
lexer->idx += 1;
lexer->ch = lexer->text[lexer->idx];
}
void lexer_report(Lexer* lexer, const char* msg, Loc loc)
{
lexer->error_occured = true;
fprintf(stderr, FMT_ERROR("%s"), msg);
loc_pretty_print(loc, lexer->text, lexer->text_len);
}
Loc lexer_loc(const Lexer* lexer)
{
return (Loc) {
.filename = lexer->filename,
.idx = lexer->idx,
.line = lexer->line,
.col = lexer->col,
};
}
bool lexer_done(const Lexer* lexer)
{
return lexer->idx >= lexer->text_len;
}
Tok lexer_tok(const Lexer* lexer, TokTy ty, Loc loc)
{
return (Tok) { .ty = ty, .loc = loc, .len = lexer->idx - loc.idx };
}
bool str_includes(const char* str, char ch)
{
for (size_t i = 0; str[i] != '\0'; ++i) {
if (str[i] == ch) {
return true;
}
}
return false;
}
void pexpr_free(PExpr* expr)
{
switch (expr->ty) {
case PExprTy_Err:
case PExprTy_Imm:
break;
case PExprTy_Ident:
case PExprTy_SubLabel:
case PExprTy_Str:
free(expr->str);
break;
case PExprTy_Mem:
case PExprTy_Not:
case PExprTy_Negate:
pexpr_free(expr->operand);
break;
case PExprTy_Or:
case PExprTy_Xor:
case PExprTy_And:
case PExprTy_Shl:
case PExprTy_Shr:
case PExprTy_Add:
case PExprTy_Sub:
case PExprTy_Mul:
case PExprTy_Div:
case PExprTy_Mod:
pexpr_free(expr->left);
pexpr_free(expr->right);
break;
}
free(expr);
}
void pconst_free(PConst* stmt)
{
free(stmt->ident);
pexpr_free(stmt->value);
free(stmt);
}
void pinclude_free(PInclude* stmt)
{
free(stmt->filename);
free(stmt);
}
void plabel_free(PLabel* stmt)
{
free(stmt->ident);
free(stmt);
}
void pline_free(PLine* stmt)
{
free(stmt->ident);
for (size_t i = 0; i < stmt->ops_size; ++i) {
pexpr_free(stmt->ops[i]);
}
free(stmt);
}