vc3/asm/main.c
2025-04-02 16:13:31 +02:00

2220 lines
72 KiB
C

#include "asm/asm.h"
#include "common/arch.h"
#include <assert.h>
#include <errno.h>
#include <stdbool.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
static inline bool str_includes(const char* str, char ch)
{
for (size_t i = 0; str[i] != '\0'; ++i) {
if (str[i] == ch) {
return true;
}
}
return false;
}
static inline char* asm_strdup(const char* str)
{
size_t len = strlen(str);
char* val = calloc(len + 1, sizeof(char));
strncpy(val, str, len);
return val;
}
static inline char* asm_strndup(const char* str, size_t len)
{
char* val = calloc(len + 1, sizeof(char));
strncpy(val, str, len);
return val;
}
typedef struct {
size_t idx;
int line;
int col;
} Loc;
#define REPORTF_ERROR(FMT, ...) \
(fprintf( \
stderr, "\x1b[1;91merror\x1b[1;97m: " FMT "\x1b[0m\n", __VA_ARGS__))
#define REPORTF_INFO(FMT, ...) \
(fprintf(stderr, "\x1b[1;96minfo\x1b[1;97m: " FMT "\x1b[0m\n", __VA_ARGS__))
#define REPORTF_WARNING(FMT, ...) \
(fprintf( \
stderr, "\x1b[1;93mwarning\x1b[1;97m: " FMT "\x1b[0m\n", __VA_ARGS__))
static inline void print_report_loc(
const char* filename, const char* text, size_t text_len, Loc loc)
{
size_t line_start = loc.idx;
while (line_start > 0 && text[line_start] != '\n') {
line_start -= 1;
}
if (text[line_start] == '\n') {
line_start += 1;
}
size_t line_end = loc.idx + 1;
while (line_end < text_len && text[line_end] != '\n') {
line_end += 1;
}
const char* line = &text[line_start];
int line_len = (int)line_end - (int)line_start;
fprintf(stderr,
" \x1b[96m--> ./%s:%d:%d\n "
"\x1b[37m|\n\x1b[96m%5d\x1b[37m|%.*s\n "
"|%*c\x1b[1;91m^\x1b[0m\n",
filename,
loc.line,
loc.col,
loc.line,
line_len,
line,
loc.col - 1,
' ');
}
typedef struct {
const char* filename;
const char* text;
size_t text_len;
} Reporter;
void reporter_print_loc(Reporter* rep, Loc loc)
{
print_report_loc(rep->filename, rep->text, rep->text_len, loc);
}
void reporter_error_with_loc(Reporter* rep, const char* msg, Loc loc)
{
REPORTF_ERROR("%s", msg);
reporter_print_loc(rep, loc);
}
typedef enum {
TT_Err,
TT_Eof,
TT_Ident,
TT_Int,
TT_Binary,
TT_Hex,
TT_Char,
TT_Str,
TT_Newline = '\n',
TT_DoubleLt,
TT_DoubleGt,
TT_Pipe = '|',
TT_Hat = '^',
TT_Ampersand = '&',
TT_Plus = '+',
TT_Minus = '-',
TT_Asterisk = '*',
TT_Slash = '/',
TT_Percent = '%',
TT_LParen = '(',
TT_RParen = ')',
TT_LBracket = '[',
TT_RBracket = ']',
TT_Dot = '.',
TT_Comma = ',',
TT_Colon = ':',
TT_Exclamation = '!',
} TokTy;
typedef struct {
TokTy ty;
Loc loc;
size_t len;
} Tok;
typedef struct {
const char* filename;
const char* text;
size_t text_len;
size_t idx;
int line;
int col;
char ch;
bool error_occured;
} Lexer;
void lexer_construct(Lexer* lexer, const char* filename, const char* text)
{
*lexer = (Lexer) {
.filename = filename,
.text = text,
.text_len = strlen(text),
.idx = 0,
.line = 1,
.col = 1,
.ch = text[0],
.error_occured = false,
};
}
static inline bool lexer_done(const Lexer* lexer)
{
return lexer->idx >= lexer->text_len;
}
static inline void lexer_step(Lexer* lexer)
{
if (lexer_done(lexer)) {
return;
}
if (lexer->ch == '\n') {
lexer->line += 1;
lexer->col = 1;
} else {
lexer->col += 1;
}
lexer->idx += 1;
lexer->ch = lexer->text[lexer->idx];
}
static inline Loc lexer_loc(const Lexer* lexer)
{
return (Loc) { .idx = lexer->idx, .line = lexer->line, .col = lexer->col };
}
static inline Tok lexer_tok(const Lexer* lexer, TokTy ty, Loc loc)
{
return (Tok) { .ty = ty, .loc = loc, .len = lexer->idx - loc.idx };
}
static inline int lexer_skip_literal_char(Lexer* lexer)
{
char ch = lexer->ch;
lexer_step(lexer);
if (ch == '\\') {
if (lexer_done(lexer))
return -1;
lexer_step(lexer);
}
return 0;
}
static inline void lexer_report(Lexer* lexer, const char* msg, Loc loc)
{
lexer->error_occured = true;
REPORTF_ERROR("%s", msg);
print_report_loc(lexer->filename, lexer->text, lexer->text_len, loc);
}
Tok lexer_next(Lexer* lexer)
{
const char* ident_chars = "abcdefghijklmnopqrstuvwxyz"
"ABCDEFGHIJKLMNOPQRSTUVWXYZ_$";
const char* int_chars = "1234567890";
const char* hex_chars = "01234567889abcdefABCDEF";
Loc loc = lexer_loc(lexer);
if (lexer_done(lexer)) {
return lexer_tok(lexer, TT_Eof, loc);
}
if (lexer->ch == '\n') {
lexer_step(lexer);
return lexer_tok(lexer, '\n', loc);
} else if (str_includes(" \t", lexer->ch)) {
while (!lexer_done(lexer) && str_includes(" \t", lexer->ch)) {
lexer_step(lexer);
}
return lexer_next(lexer);
} else if (str_includes(ident_chars, lexer->ch)) {
while (!lexer_done(lexer)
&& (str_includes(ident_chars, lexer->ch)
|| str_includes(int_chars, lexer->ch))) {
lexer_step(lexer);
}
return lexer_tok(lexer, TT_Ident, loc);
} else if (str_includes(int_chars, lexer->ch) && lexer->ch != '0') {
while (!lexer_done(lexer) && (str_includes(int_chars, lexer->ch))) {
lexer_step(lexer);
}
return lexer_tok(lexer, TT_Int, loc);
} else if (lexer->ch == ';') {
while (!lexer_done(lexer) && lexer->ch != '\n') {
lexer_step(lexer);
}
return lexer_next(lexer);
} else if (lexer->ch == '0') {
lexer_step(lexer);
if (lexer->ch == 'b') {
lexer_step(lexer);
if (lexer_done(lexer) || !str_includes("01", lexer->ch)) {
lexer_report(lexer, "malformed binary literal", loc);
return lexer_tok(lexer, TT_Err, loc);
}
while (!lexer_done(lexer) && str_includes("01", lexer->ch)) {
lexer_step(lexer);
}
return lexer_tok(lexer, TT_Binary, loc);
} else if (lexer->ch == 'x') {
lexer_step(lexer);
if (lexer_done(lexer) || !str_includes(hex_chars, lexer->ch)) {
lexer_report(lexer, "malformed hex literal", loc);
return lexer_tok(lexer, TT_Err, loc);
}
while (!lexer_done(lexer) && str_includes(hex_chars, lexer->ch)) {
lexer_step(lexer);
}
return lexer_tok(lexer, TT_Hex, loc);
} else {
return lexer_tok(lexer, TT_Int, loc);
}
} else if (lexer->ch == '\'') {
lexer_step(lexer);
lexer_skip_literal_char(lexer);
if (lexer_done(lexer) || lexer->ch != '\'') {
lexer_report(lexer, "malformed character literal", loc);
return lexer_tok(lexer, TT_Err, loc);
}
lexer_step(lexer);
return lexer_tok(lexer, TT_Char, loc);
} else if (lexer->ch == '"') {
lexer_step(lexer);
while (!lexer_done(lexer) && lexer->ch != '"') {
lexer_skip_literal_char(lexer);
}
if (lexer_done(lexer) || lexer->ch != '"') {
lexer_report(lexer, "malformed string literal", loc);
return lexer_tok(lexer, TT_Err, loc);
}
lexer_step(lexer);
return lexer_tok(lexer, TT_Str, loc);
} else if (lexer->ch == '<') {
lexer_step(lexer);
if (!lexer_done(lexer) && lexer->ch == '<') {
lexer_step(lexer);
return lexer_tok(lexer, TT_DoubleLt, loc);
} else {
lexer_report(lexer, "expected '<'", loc);
return lexer_tok(lexer, TT_Err, loc);
}
} else if (lexer->ch == '>') {
lexer_step(lexer);
if (!lexer_done(lexer) && lexer->ch == '>') {
lexer_step(lexer);
return lexer_tok(lexer, TT_DoubleGt, loc);
} else {
lexer_report(lexer, "expected '>'", loc);
return lexer_tok(lexer, TT_Err, loc);
}
} else if (str_includes("|^&+-*/%()[].,:!", lexer->ch)) {
char ch = lexer->ch;
lexer_step(lexer);
return lexer_tok(lexer, (TokTy)ch, loc);
} else {
lexer_report(lexer, "illegal character", loc);
lexer_step(lexer);
return lexer_tok(lexer, TT_Err, loc);
}
}
typedef struct PLabel PLabel;
struct PLabel {
PLabel* next;
char* ident;
Loc loc;
bool sub_label;
};
PLabel* plabel_new(PLabel* next, char* ident, bool sub_label, Loc loc)
{
PLabel* label = malloc(sizeof(PLabel));
*label = (PLabel) { next, ident, loc, sub_label };
return label;
}
void plabel_free(PLabel* label)
{
if (!label) {
return;
}
plabel_free(label->next);
free(label->ident);
free(label);
}
typedef enum {
PoTy_Reg,
PoTy_Imm,
PoTy_Ident,
PoTy_SubLabel,
PoTy_Str,
PoTy_Mem8,
PoTy_Mem16,
PoTy_Not,
PoTy_Negate,
PoTy_Or,
PoTy_Xor,
PoTy_And,
PoTy_Shl,
PoTy_Shr,
PoTy_Add,
PoTy_Sub,
PoTy_Mul,
PoTy_Div,
PoTy_Mod,
} POperandTy;
typedef struct POperand POperand;
struct POperand {
POperandTy ty;
Loc loc;
union {
Reg reg;
uint16_t imm;
struct {
char* str;
size_t str_len;
};
POperand* operand;
struct {
POperand* left;
POperand* right;
};
};
};
POperand* poperand_new_reg(Reg reg, Loc loc)
{
POperand* operand = malloc(sizeof(POperand));
*operand = (POperand) { .ty = PoTy_Reg, .loc = loc, .reg = reg };
return operand;
}
POperand* poperand_new_imm(uint16_t imm, Loc loc)
{
POperand* operand = malloc(sizeof(POperand));
*operand = (POperand) { .ty = PoTy_Imm, .loc = loc, .imm = imm };
return operand;
}
POperand* poperand_new_str(POperandTy ty, char* str, size_t str_len, Loc loc)
{
POperand* operand = malloc(sizeof(POperand));
*operand = (POperand) {
.ty = ty,
.loc = loc,
.str = str,
.str_len = str_len,
};
return operand;
}
POperand* poperand_new_unary(POperandTy ty, POperand* inner, Loc loc)
{
POperand* operand = malloc(sizeof(POperand));
*operand = (POperand) { .ty = ty, .loc = loc, .operand = inner };
return operand;
}
POperand* poperand_new_binary(
POperandTy ty, POperand* left, POperand* right, Loc loc)
{
POperand* operand = malloc(sizeof(POperand));
*operand
= (POperand) { .ty = ty, .loc = loc, .left = left, .right = right };
return operand;
}
void poperand_free(POperand* operand)
{
switch (operand->ty) {
case PoTy_Reg:
case PoTy_Imm:
break;
case PoTy_Ident:
case PoTy_SubLabel:
case PoTy_Str:
free(operand->str);
break;
case PoTy_Mem8:
case PoTy_Mem16:
case PoTy_Not:
case PoTy_Negate:
poperand_free(operand->operand);
break;
case PoTy_Or:
case PoTy_Xor:
case PoTy_And:
case PoTy_Shl:
case PoTy_Shr:
case PoTy_Add:
case PoTy_Sub:
case PoTy_Mul:
case PoTy_Div:
case PoTy_Mod:
poperand_free(operand->left);
poperand_free(operand->right);
break;
}
free(operand);
}
typedef struct {
PLabel* labels;
char* op;
Loc loc;
size_t ops_size;
POperand* ops[];
} PLine;
PLine* pline_new(
char* op, PLabel* labels, Loc loc, size_t ops_size, POperand** ops)
{
PLine* line = malloc(sizeof(PLine) + sizeof(POperand*) * ops_size);
*line = (PLine) {
.labels = labels,
.op = op,
.loc = loc,
.ops_size = ops_size,
};
for (size_t i = 0; i < ops_size; ++i) {
line->ops[i] = ops[i];
}
return line;
}
void pline_free(PLine* pline)
{
plabel_free(pline->labels);
free(pline->op);
for (size_t i = 0; i < pline->ops_size; ++i) {
poperand_free(pline->ops[i]);
}
free(pline);
}
typedef enum {
PStmtTy_Line,
PStmtTy_Global,
PStmtTy_Extern,
PStmtTy_Include,
PStmtTy_Define,
} PStmtTy;
typedef struct {
Lexer lexer;
Tok tok;
Tok eaten;
bool error_occured;
} Parser;
void parser_construct(Parser* parser, const char* filename, const char* text)
{
Lexer lexer;
lexer_construct(&lexer, filename, text);
*parser = (Parser) {
.lexer = lexer,
.tok = lexer_next(&lexer),
.eaten = (Tok) { 0 },
.error_occured = false,
};
}
bool parser_done(const Parser* parser)
{
return parser->tok.ty == TT_Eof;
}
bool parser_error_occured(const Parser* parser)
{
return parser->error_occured || parser->lexer.error_occured;
}
static inline void parser_step(Parser* parser)
{
parser->tok = lexer_next(&parser->lexer);
}
static inline bool parser_test(const Parser* parser, TokTy ty)
{
return parser->tok.ty == ty;
}
static inline bool parser_eat(Parser* parser, TokTy ty)
{
if (parser_test(parser, ty)) {
parser->eaten = parser->tok;
parser_step(parser);
return true;
}
return false;
}
static inline char* parser_ident_val(const Parser* parser, Tok tok)
{
return asm_strndup(&parser->lexer.text[tok.loc.idx], tok.len);
}
static inline void parser_report(Parser* parser, const char* msg, Loc loc)
{
parser->error_occured = true;
REPORTF_ERROR("%s", msg);
print_report_loc(parser->lexer.filename,
parser->lexer.text,
parser->lexer.text_len,
loc);
}
static inline void parser_skip_newlines(Parser* parser)
{
while (parser_eat(parser, '\n')) { }
}
static inline PLabel* parser_parse_labels(
Parser* parser, char** ident, Loc* ident_loc)
{
*ident = NULL;
PLabel* labels = NULL;
while (parser->tok.ty != TT_Eof && *ident == NULL) {
parser_skip_newlines(parser);
Loc loc = parser->tok.loc;
if (parser_eat(parser, '.')) {
if (!parser_eat(parser, TT_Ident)) {
parser_report(parser, "expected identifier", parser->tok.loc);
plabel_free(labels);
return NULL;
}
char* label_ident = parser_ident_val(parser, parser->eaten);
if (!parser_eat(parser, ':')) {
parser_report(parser, "expected ':'", parser->tok.loc);
plabel_free(labels);
free(label_ident);
return NULL;
}
labels = plabel_new(labels, label_ident, true, loc);
} else if (parser_eat(parser, TT_Ident)) {
*ident = parser_ident_val(parser, parser->eaten);
*ident_loc = loc;
if (!parser_eat(parser, ':')) {
break;
}
labels = plabel_new(labels, *ident, false, loc);
*ident = NULL;
} else {
parser_report(
parser, "expected identifier or ':'", parser->tok.loc);
plabel_free(labels);
return NULL;
}
}
return labels;
}
static inline char literal_char_val(const char* str)
{
if (str[0] == '\\') {
switch (str[1]) {
case '0':
return 0;
case 't':
return '\t';
case 'n':
return '\n';
default:
return str[1];
}
} else {
return str[0];
}
}
static const int parser_binary_prec = 6;
static inline POperand* parser_parse_operand_2(Parser* parser, int prec);
static inline POperand* parser_parse_operand_0(Parser* parser)
{
Loc loc = parser->tok.loc;
if (parser_eat(parser, TT_Ident)) {
char* ident = parser_ident_val(parser, parser->eaten);
const char* reg_key[10] = {
"r0", "r1", "r2", "r3", "r4", "rbp", "rsp", "rfl", "rcs", "rip"
};
Reg reg_val[10] = { R0, R1, R2, R3, R4, Rbp, Rsp, Rfl, Rcs, Rip };
for (size_t i = 0; i < 10; ++i) {
if (strcmp(reg_key[i], ident) == 0) {
free(ident);
return poperand_new_reg(reg_val[i], loc);
}
}
return poperand_new_str(PoTy_Ident, ident, parser->eaten.len, loc);
} else if (parser_eat(parser, TT_Int)) {
char* str = parser_ident_val(parser, parser->eaten);
uint64_t val = strtoull(str, NULL, 10);
free(str);
if (val > 0xffff) {
parser_report(parser,
"integers larger than 65536 not supported",
parser->tok.loc);
return NULL;
}
uint16_t imm = (uint16_t)val;
return poperand_new_imm(imm, loc);
} else if (parser_eat(parser, TT_Binary)) {
char* str = parser_ident_val(parser, parser->eaten);
uint64_t val = strtoull(&str[2], NULL, 2);
free(str);
if (val > 0xffff) {
parser_report(parser,
"integers larger than 65536 not supported",
parser->tok.loc);
return NULL;
}
uint16_t imm = (uint16_t)val;
return poperand_new_imm(imm, loc);
} else if (parser_eat(parser, TT_Hex)) {
char* str = parser_ident_val(parser, parser->eaten);
uint64_t val = strtoull(&str[2], NULL, 16);
free(str);
if (val > 0xffff) {
parser_report(parser,
"integers larger than 65536 not supported",
parser->tok.loc);
return NULL;
}
uint16_t imm = (uint16_t)val;
return poperand_new_imm(imm, loc);
} else if (parser_eat(parser, TT_Char)) {
char* str = parser_ident_val(parser, parser->eaten);
uint16_t imm = (uint16_t)literal_char_val(&str[1]);
free(str);
return poperand_new_imm(imm, loc);
} else if (parser_eat(parser, TT_Str)) {
char* lit = parser_ident_val(parser, parser->eaten);
size_t lit_len = strlen(lit);
char* str = calloc(lit_len - 1, sizeof(char));
size_t str_len = 0;
for (size_t i = 1; i < lit_len - 2; ++i) {
str[i] = literal_char_val(&lit[i]);
}
free(lit);
return poperand_new_str(PoTy_Str, str, str_len, loc);
} else if (parser_eat(parser, '.')) {
if (!parser_eat(parser, TT_Ident)) {
parser_report(parser, "expected identifier", parser->tok.loc);
return NULL;
}
char* ident = parser_ident_val(parser, parser->eaten);
return poperand_new_str(PoTy_SubLabel, ident, parser->eaten.len, loc);
} else if (parser_eat(parser, '(')) {
POperand* operand = parser_parse_operand_2(parser, parser_binary_prec);
if (!parser_eat(parser, ')')) {
parser_report(parser, "expected ')'", parser->tok.loc);
poperand_free(operand);
return NULL;
}
return operand;
} else {
parser_report(parser, "expected operand", parser->tok.loc);
return NULL;
}
}
static inline POperand* parser_parse_operand_1(Parser* parser)
{
Loc loc = parser->tok.loc;
if (parser_eat(parser, '-')) {
POperand* operand = parser_parse_operand_1(parser);
return poperand_new_unary(PoTy_Negate, operand, loc);
} else if (parser_eat(parser, '!')) {
POperand* operand = parser_parse_operand_1(parser);
return poperand_new_unary(PoTy_Not, operand, loc);
} else {
return parser_parse_operand_0(parser);
}
}
static inline POperand* parser_parse_operand_2(Parser* parser, int prec)
{
const POperandTy op_tys[] = {
PoTy_Or,
PoTy_Xor,
PoTy_And,
PoTy_Shr,
PoTy_Shl,
PoTy_Add,
PoTy_Sub,
PoTy_Mul,
PoTy_Div,
PoTy_Mod,
};
const TokTy op_tts[] = {
'|',
'^',
'&',
TT_DoubleGt,
TT_DoubleLt,
'+',
'-',
'*',
'/',
'%',
};
const int op_precs[] = { 6, 5, 4, 3, 3, 2, 2, 1, 1, 1 };
static_assert(sizeof(op_tys) / sizeof(op_tys[0])
== sizeof(op_tts) / sizeof(op_tts[0]),
"misaligned");
static_assert(sizeof(op_tys) / sizeof(op_tys[0])
== sizeof(op_precs) / sizeof(op_precs[0]),
"misaligned");
if (prec == 0) {
return parser_parse_operand_1(parser);
}
POperand* left = parser_parse_operand_2(parser, prec - 1);
bool should_continue = true;
while (should_continue) {
should_continue = false;
for (size_t i = 0; i < sizeof(op_tys) / sizeof(op_tys[0]); ++i) {
if (prec >= op_precs[i] && parser_eat(parser, op_tts[i])) {
POperand* right = parser_parse_operand_2(parser, prec - 1);
left = poperand_new_binary(op_tys[i], left, right, left->loc);
should_continue = true;
break;
}
}
}
return left;
}
static inline POperand* parser_parse_operand_3(Parser* parser)
{
Loc loc = parser->tok.loc;
if (parser_eat(parser, TT_LBracket)) {
parser_report(parser, "expected 'u8' or 'u16' before '['", loc);
return NULL;
}
if (!parser_test(parser, TT_Ident)) {
return parser_parse_operand_2(parser, parser_binary_prec);
}
char* ident = parser_ident_val(parser, parser->tok);
if (strcmp(ident, "u8") == 0) {
free(ident);
parser_step(parser);
if (!parser_eat(parser, '[')) {
parser_report(parser, "expected '['", parser->tok.loc);
return NULL;
}
POperand* operand = parser_parse_operand_2(parser, parser_binary_prec);
if (!parser_eat(parser, ']')) {
parser_report(parser, "expected ']'", parser->tok.loc);
poperand_free(operand);
return NULL;
}
return poperand_new_unary(PoTy_Mem8, operand, loc);
} else if (strcmp(ident, "u16") == 0) {
free(ident);
parser_step(parser);
if (!parser_eat(parser, '[')) {
parser_report(parser, "expected '['", parser->tok.loc);
return NULL;
}
POperand* operand = parser_parse_operand_2(parser, parser_binary_prec);
if (!parser_eat(parser, ']')) {
parser_report(parser, "expected ']'", parser->tok.loc);
poperand_free(operand);
return NULL;
}
return poperand_new_unary(PoTy_Mem16, operand, loc);
} else {
free(ident);
return parser_parse_operand_2(parser, parser_binary_prec);
}
}
static inline void parser_skip_to_next_line(Parser* parser)
{
while (!parser_done(parser) && !parser_eat(parser, TT_Newline)) {
parser_step(parser);
}
}
PLine* parser_next(Parser* parser)
{
char* ident;
Loc loc;
PLabel* labels = parser_parse_labels(parser, &ident, &loc);
const size_t max_ops_size = 64;
// TODO: Move allocation out-of-band.
POperand** ops = malloc(sizeof(POperand) * max_ops_size);
size_t ops_size = 0;
if (!parser_test(parser, TT_Eof) && !parser_test(parser, '\n')) {
POperand* operand = parser_parse_operand_3(parser);
if (!operand) {
parser_skip_to_next_line(parser);
goto error_free_ops;
}
ops[ops_size++] = operand;
while (!parser_test(parser, TT_Eof) && !parser_test(parser, '\n')
&& ops_size < 3) {
if (ops_size >= max_ops_size) {
parser_report(parser,
"exceeded maximum number of operands (64)",
parser->tok.loc);
parser_skip_to_next_line(parser);
goto error_free_ops;
}
if (!parser_eat(parser, ',')) {
parser_report(parser, "expected ','", parser->tok.loc);
parser_skip_to_next_line(parser);
goto error_free_ops;
}
POperand* operand = parser_parse_operand_3(parser);
if (!operand) {
parser_skip_to_next_line(parser);
goto error_free_ops;
}
ops[ops_size++] = operand;
}
}
if (!parser_eat(parser, '\n') && !parser_test(parser, TT_Eof)) {
parser_report(parser, "expected newline", parser->tok.loc);
goto error_free_ops;
}
parser_skip_newlines(parser);
PLine* line = pline_new(ident, labels, loc, ops_size, ops);
free(ops);
return line;
error_free_ops:
for (size_t i = 0; i < ops_size; ++i)
if (ops[i])
poperand_free(ops[i]);
free(ops);
plabel_free(labels);
free(ident);
return NULL;
}
typedef enum {
IdentResolTy_None,
IdentResolTy_Label,
IdentResolTy_SubLabel,
} IdentResolTy;
typedef struct IdentResol IdentResol;
struct IdentResol {
char* ident;
Loc loc;
const IdentResol* parent;
IdentResolTy ty;
union {
uint16_t ip;
};
};
void ident_resol_destroy(IdentResol* resol)
{
switch (resol->ty) {
case IdentResolTy_None:
break;
case IdentResolTy_Label:
case IdentResolTy_SubLabel:
free(resol->ident);
break;
}
}
typedef struct IdentResolver IdentResolver;
struct IdentResolver {
IdentResol* resols;
size_t resols_capacity;
size_t resols_size;
const IdentResol* current_parent;
};
void ident_resolver_construct(IdentResolver* resolver)
{
size_t capacity = 512;
*resolver = (IdentResolver) {
.resols = malloc(sizeof(IdentResol) * capacity),
.resols_capacity = capacity,
.resols_size = 0,
};
}
void ident_resolver_destroy(IdentResolver* resolver)
{
for (size_t i = 0; i < resolver->resols_size; ++i) {
ident_resol_destroy(&resolver->resols[i]);
}
free(resolver->resols);
}
static inline size_t ident_resolver_first_empty(IdentResolver* resolver)
{
size_t i = 0;
for (; i < resolver->resols_size; ++i) {
if (resolver->resols[i].ty == IdentResolTy_None) {
break;
}
}
if (i >= resolver->resols_size) {
if (resolver->resols_size + 1 > resolver->resols_capacity) {
resolver->resols_capacity *= 2;
resolver->resols = realloc(resolver->resols,
sizeof(IdentResol) * resolver->resols_capacity);
}
resolver->resols_size += 1;
}
return i;
}
void ident_resolver_define_label(
IdentResolver* resolver, char* ident, Loc loc, uint16_t asm_ip)
{
size_t i = ident_resolver_first_empty(resolver);
resolver->resols[i] = (IdentResol) {
.ident = ident,
.loc = loc,
.ty = IdentResolTy_Label,
.ip = asm_ip * 2,
};
resolver->current_parent = &resolver->resols[i];
}
void ident_resolver_define_sublabel(
IdentResolver* resolver, char* ident, Loc loc, uint16_t asm_ip)
{
size_t i = ident_resolver_first_empty(resolver);
resolver->resols[i] = (IdentResol) {
.ident = ident,
.loc = loc,
.parent = resolver->current_parent,
.ty = IdentResolTy_SubLabel,
.ip = asm_ip * 2,
};
}
const IdentResol* ident_resolver_resolve(
const IdentResolver* resolver, const char* ident)
{
for (size_t i = resolver->resols_size; i > 0; --i) {
IdentResol* re = &resolver->resols[i - 1];
if (re->ty != IdentResolTy_None && strcmp(re->ident, ident) == 0
&& (re->ty != IdentResolTy_SubLabel
|| re->parent == resolver->current_parent)) {
return re;
}
}
return NULL;
}
static inline int define_labels(
IdentResolver* resolver, PLabel* label, uint16_t asm_ip, Reporter* rep)
{
if (label == NULL)
return 0;
int res = define_labels(resolver, label->next, asm_ip, rep);
const IdentResol* existing = ident_resolver_resolve(resolver, label->ident);
if (existing != NULL) {
REPORTF_ERROR("redefinition of identifier '%s'", label->ident);
reporter_print_loc(rep, label->loc);
REPORTF_INFO("original definition of '%s'", label->ident);
reporter_print_loc(rep, existing->loc);
return 1;
}
if (label->sub_label) {
ident_resolver_define_sublabel(
resolver, asm_strdup(label->ident), label->loc, asm_ip);
} else {
ident_resolver_define_label(
resolver, asm_strdup(label->ident), label->loc, asm_ip);
}
return res;
}
static inline void use_labels(IdentResolver* resolver, PLabel* label)
{
if (label == NULL)
return;
use_labels(resolver, label->next);
const IdentResol* existing = ident_resolver_resolve(resolver, label->ident);
if (existing->ty == IdentResolTy_Label) {
resolver->current_parent = existing;
}
}
typedef enum {
EoTy_Err,
EoTy_Reg,
EoTy_Imm,
EoTy_Str,
EoTy_Mem8Reg,
EoTy_Mem8Imm,
EoTy_MemU16Reg,
EoTy_MemU16Imm,
} EvaledOperandTy;
typedef struct {
EvaledOperandTy ty;
union {
Reg reg;
uint16_t imm;
};
uint16_t offset;
} EvaledOperand;
typedef struct {
IdentResolver* re;
Reporter* rep;
bool unresolve_is_error;
} OperandEvaluator;
static inline uint16_t eval_poperandty_unary(POperandTy ty, uint16_t operand)
{
switch (ty) {
case PoTy_Not:
return ~operand;
case PoTy_Negate:
return (uint16_t)-(int16_t)operand;
default:
return 0;
}
}
static inline uint16_t eval_poperandty_binary(
POperandTy ty, uint16_t left, uint16_t right)
{
switch (ty) {
case PoTy_Or:
return left | right;
case PoTy_Xor:
return left ^ right;
case PoTy_And:
return left & right;
case PoTy_Shl:
return (uint16_t)(left << right);
case PoTy_Shr:
return (uint16_t)(left >> right);
case PoTy_Add:
return (uint16_t)((int16_t)left + (int16_t)right);
case PoTy_Sub:
return (uint16_t)((int16_t)left - (int16_t)right);
case PoTy_Mul:
return (uint16_t)((int16_t)left * (int16_t)right);
case PoTy_Div:
return (uint16_t)((int16_t)left / (int16_t)right);
case PoTy_Mod:
return (uint16_t)((int16_t)left % (int16_t)right);
default:
return 0;
}
}
EvaledOperand eval_operand_to_imm(
OperandEvaluator* evaluator, POperand* operand)
{
switch (operand->ty) {
case PoTy_Str:
REPORTF_ERROR("%s", "strings cannot be part of expressions");
reporter_print_loc(evaluator->rep, operand->loc);
return (EvaledOperand) { .ty = EoTy_Err };
case PoTy_Mem8:
case PoTy_Mem16:
REPORTF_ERROR("%s", "indirections cannot be part of expressions");
reporter_print_loc(evaluator->rep, operand->loc);
return (EvaledOperand) { .ty = EoTy_Err };
case PoTy_Reg:
REPORTF_ERROR("%s", "registers cannot be part of expressions");
reporter_print_loc(evaluator->rep, operand->loc);
return (EvaledOperand) { .ty = EoTy_Err };
case PoTy_Imm:
return (EvaledOperand) { .ty = EoTy_Imm, .imm = operand->imm };
case PoTy_Ident:
case PoTy_SubLabel: {
const IdentResol* re
= ident_resolver_resolve(evaluator->re, operand->str);
if (re == NULL) {
if (!evaluator->unresolve_is_error) {
return (EvaledOperand) { .ty = EoTy_Imm, .imm = 0 };
}
REPORTF_ERROR("undefined identifier '%s'", operand->str);
reporter_print_loc(evaluator->rep, operand->loc);
return (EvaledOperand) { .ty = EoTy_Err };
}
switch (re->ty) {
case IdentResolTy_None:
break;
case IdentResolTy_Label:
case IdentResolTy_SubLabel:
return (EvaledOperand) { .ty = EoTy_Imm, .imm = re->ip };
}
fprintf(stderr, "unreachable\n");
exit(1);
}
case PoTy_Not:
case PoTy_Negate: {
EvaledOperand inner
= eval_operand_to_imm(evaluator, operand->operand);
if (inner.ty == EoTy_Err) {
return inner;
} else if (inner.ty != EoTy_Imm) {
REPORTF_ERROR("%s", "operand cannot be used in expressions");
reporter_print_loc(evaluator->rep, operand->operand->loc);
return (EvaledOperand) { .ty = EoTy_Err };
}
return (EvaledOperand) {
.ty = EoTy_Imm,
.imm = eval_poperandty_unary(operand->ty, inner.imm),
};
}
case PoTy_Or:
case PoTy_Xor:
case PoTy_And:
case PoTy_Shl:
case PoTy_Shr:
case PoTy_Add:
case PoTy_Sub:
case PoTy_Mul:
case PoTy_Div:
case PoTy_Mod: {
EvaledOperand left = eval_operand_to_imm(evaluator, operand->left);
if (left.ty == EoTy_Err) {
return left;
} else if (left.ty != EoTy_Imm) {
REPORTF_ERROR("%s", "operand cannot be used in expressions");
reporter_print_loc(evaluator->rep, operand->left->loc);
return (EvaledOperand) { .ty = EoTy_Err };
}
EvaledOperand right
= eval_operand_to_imm(evaluator, operand->right);
if (right.ty == EoTy_Err) {
return right;
} else if (right.ty != EoTy_Imm) {
REPORTF_ERROR("%s", "operand cannot be used in expressions");
reporter_print_loc(evaluator->rep, operand->right->loc);
return (EvaledOperand) { .ty = EoTy_Err };
}
return (EvaledOperand) {
.ty = EoTy_Imm,
.imm = eval_poperandty_binary(operand->ty, left.imm, right.imm),
};
}
}
fprintf(stderr, "unreachable\n");
exit(1);
}
EvaledOperand eval_operand_indirection_expr(
OperandEvaluator* evaluator, POperand* operand)
{
switch (operand->ty) {
case PoTy_Reg:
return (EvaledOperand) {
.ty = EoTy_Mem8Reg,
.reg = operand->reg,
.offset = 0,
};
case PoTy_Str:
REPORTF_ERROR("%s", "strings cannot be part of indirections");
reporter_print_loc(evaluator->rep, operand->loc);
return (EvaledOperand) { .ty = EoTy_Err };
case PoTy_Mem8:
case PoTy_Mem16:
REPORTF_ERROR("%s", "indirections cannot be part of indirections");
reporter_print_loc(evaluator->rep, operand->loc);
return (EvaledOperand) { .ty = EoTy_Err };
case PoTy_Imm:
case PoTy_Ident:
case PoTy_SubLabel:
case PoTy_Not:
case PoTy_Negate:
case PoTy_Or:
case PoTy_Xor:
case PoTy_And:
case PoTy_Shl:
case PoTy_Shr:
case PoTy_Mul:
case PoTy_Div:
case PoTy_Mod: {
EvaledOperand evaled = eval_operand_to_imm(evaluator, operand);
if (evaled.ty == EoTy_Err) {
return evaled;
}
return (EvaledOperand) {
.ty = EoTy_Mem8Imm,
.imm = evaled.imm,
};
}
case PoTy_Add: {
if (operand->left->ty == PoTy_Reg) {
EvaledOperand right
= eval_operand_to_imm(evaluator, operand->right);
if (right.ty == EoTy_Err) {
return right;
}
return (EvaledOperand) {
.ty = EoTy_Mem8Imm,
.reg = operand->left->reg,
.offset = right.imm,
};
} else if (operand->right->ty == PoTy_Reg) {
EvaledOperand left
= eval_operand_to_imm(evaluator, operand->left);
if (left.ty == EoTy_Err) {
return left;
}
return (EvaledOperand) {
.ty = EoTy_Mem8Imm,
.reg = operand->right->reg,
.offset = left.imm,
};
} else {
EvaledOperand evaled = eval_operand_to_imm(evaluator, operand);
if (evaled.ty == EoTy_Err) {
return evaled;
}
return (EvaledOperand) {
.ty = EoTy_Mem8Imm,
.imm = evaled.imm,
};
}
break;
}
case PoTy_Sub: {
if (operand->left->ty == PoTy_Reg) {
EvaledOperand right
= eval_operand_to_imm(evaluator, operand->right);
if (right.ty == EoTy_Err) {
return right;
}
return (EvaledOperand) {
.ty = EoTy_Mem8Imm,
.reg = operand->left->reg,
.offset = (uint16_t)-(int16_t)right.imm,
};
} else if (operand->right->ty == PoTy_Reg) {
EvaledOperand left
= eval_operand_to_imm(evaluator, operand->left);
if (left.ty == EoTy_Err) {
return left;
}
return (EvaledOperand) {
.ty = EoTy_Mem8Imm,
.reg = operand->right->reg,
.offset = (uint16_t)-(int16_t)left.imm,
};
} else {
EvaledOperand evaled = eval_operand_to_imm(evaluator, operand);
if (evaled.ty == EoTy_Err) {
return evaled;
}
return (EvaledOperand) {
.ty = EoTy_Mem8Imm,
.imm = evaled.imm,
};
}
break;
}
}
fprintf(stderr, "unreachable\n");
exit(1);
}
EvaledOperand eval_operand(OperandEvaluator* evaluator, POperand* operand)
{
switch (operand->ty) {
case PoTy_Str:
return (EvaledOperand) { .ty = EoTy_Str };
case PoTy_Mem8:
return eval_operand_indirection_expr(evaluator, operand->operand);
case PoTy_Mem16: {
EvaledOperand evaled
= eval_operand_indirection_expr(evaluator, operand->operand);
switch (evaled.ty) {
case EoTy_Mem8Reg:
evaled.ty = EoTy_MemU16Reg;
break;
case EoTy_Mem8Imm:
evaled.ty = EoTy_MemU16Imm;
break;
default:
break;
}
return evaled;
}
case PoTy_Reg:
return (EvaledOperand) { .ty = EoTy_Reg, .reg = operand->reg };
case PoTy_Imm:
case PoTy_Ident:
case PoTy_SubLabel:
case PoTy_Not:
case PoTy_Negate:
case PoTy_Or:
case PoTy_Xor:
case PoTy_And:
case PoTy_Shl:
case PoTy_Shr:
case PoTy_Add:
case PoTy_Sub:
case PoTy_Mul:
case PoTy_Div:
case PoTy_Mod:
return eval_operand_to_imm(evaluator, operand);
}
fprintf(stderr, "unreachable\n");
exit(1);
}
typedef enum {
// clang-format off
M_err, M_d8, M_d16, M_nop, M_hlt, M_jmp,
M_jmpf, M_jnz, M_cmp, M_mov, M_in, M_out,
M_call, M_callf, M_ret, M_retf, M_lit, M_int,
M_iret, M_or, M_xor, M_and, M_shl, M_rshl,
M_shr, M_rshr, M_add, M_sub, M_rsub, M_mul,
M_imul, M_div, M_idiv, M_rdiv, M_ridiv, M_mod,
M_rmod, M_push, M_pop
// clang-format on
} Mnemonic;
const char* mnemonic_str[] = {
// clang-format off
"err", "d8", "d16", "nop", "hlt", "jmp",
"jmpf", "jnz", "cmp", "mov", "in", "out",
"call", "callf", "ret", "retf", "lit", "int",
"iret", "or", "xor", "and", "shl", "rshl",
"shr", "rshr", "add", "sub", "rsub", "mul",
"imul", "div", "idiv", "rdiv", "ridiv", "mod",
"rmod", "push", "pop"
// clang-format on
};
static inline uint16_t pline_assemble(OperandEvaluator* evaluator,
uint16_t* object,
const PLine* line,
Reporter* rep)
{
#define CHECK_OPERAND(OP) \
do { \
if ((OP).ty == EoTy_Err) { \
return 0; \
} \
} while (0)
#define ASSEMBLE_ONE(RVAL) \
do { \
Line l = (RVAL); \
return assemble_line(object, &l); \
} while (0)
size_t mnemonics_amount = sizeof(mnemonic_str) / sizeof(mnemonic_str[0]);
Mnemonic m = M_err;
for (size_t i = 0; i < mnemonics_amount; ++i) {
if (strcmp(mnemonic_str[i], line->op) == 0) {
m = (Mnemonic)i;
break;
}
}
switch (m) {
case M_err: {
REPORTF_ERROR("unrecognized mnemonic '%s'", line->op);
reporter_print_loc(rep, line->loc);
return 0;
}
case M_d8: {
if (line->ops_size > 64) {
reporter_error_with_loc(
rep, "too many operands (max is 64)", line->loc);
return 0;
}
size_t buffer_capacity = 128;
uint8_t* buffer = malloc(sizeof(uint8_t) * buffer_capacity);
size_t buffer_size = 0;
for (size_t i = 0; i < line->ops_size; ++i) {
EvaledOperand val = eval_operand(evaluator, line->ops[i]);
CHECK_OPERAND(val);
switch (val.ty) {
case EoTy_Imm:
buffer[buffer_size++] = (uint8_t)val.imm;
break;
case EoTy_Str: {
for (size_t si = 0; si < line->ops[i]->str_len; ++si) {
buffer[buffer_size++]
= (uint8_t)line->ops[i]->str[si];
}
break;
}
default:
reporter_error_with_loc(
rep, "invalid operand", line->ops[i]->loc);
return 0;
}
}
uint16_t ip_diff = 0;
for (size_t i = 0; i < buffer_size; i += 2) {
uint16_t data = 0;
// XXX: little endian
data |= buffer[i];
if (i + 1 < buffer_size) {
data |= (uint16_t)((uint16_t)buffer[i] << 8);
}
Line l = s_data_i(data);
ip_diff += assemble_line(object, &l);
}
return ip_diff;
}
case M_d16: {
if (line->ops_size > 32) {
reporter_error_with_loc(
rep, "too many operands (max is 32)", line->loc);
return 0;
}
uint16_t ip_diff = 0;
for (size_t i = 0; i < line->ops_size; ++i) {
EvaledOperand val = eval_operand(evaluator, line->ops[i]);
CHECK_OPERAND(val);
switch (val.ty) {
case EoTy_Imm: {
Line l = s_data_i(val.imm);
ip_diff += assemble_line(object, &l);
break;
}
default:
reporter_error_with_loc(
rep, "invalid operand", line->ops[i]->loc);
return 0;
}
}
return ip_diff;
}
case M_nop:
if (line->ops_size == 0)
ASSEMBLE_ONE(s_nop());
break;
case M_hlt:
if (line->ops_size == 0)
ASSEMBLE_ONE(s_hlt());
break;
case M_jmp:
if (line->ops_size == 1) {
EvaledOperand op1 = eval_operand(evaluator, line->ops[0]);
CHECK_OPERAND(op1);
if (op1.ty == EoTy_Reg)
ASSEMBLE_ONE(s_jmp_r(op1.reg));
if (op1.ty == EoTy_Imm)
ASSEMBLE_ONE(s_jmp_i(op1.imm));
}
break;
case M_jmpf:
if (line->ops_size == 2) {
EvaledOperand op1 = eval_operand(evaluator, line->ops[0]);
CHECK_OPERAND(op1);
EvaledOperand op2 = eval_operand(evaluator, line->ops[1]);
CHECK_OPERAND(op2);
if (op1.ty == EoTy_Reg) {
if (op2.ty == EoTy_Reg)
ASSEMBLE_ONE(s_jmpf_r_r(op1.reg, op2.reg));
if (op2.ty == EoTy_Imm)
ASSEMBLE_ONE(s_jmpf_r_i(op1.reg, op2.imm));
}
if (op1.ty == EoTy_Imm) {
if (op2.ty == EoTy_Reg)
ASSEMBLE_ONE(s_jmpf_i_r(op1.imm, op2.reg));
if (op2.ty == EoTy_Imm)
ASSEMBLE_ONE(s_jmpf_i_i(op1.imm, op2.imm));
}
}
break;
case M_jnz:
if (line->ops_size == 2) {
EvaledOperand op1 = eval_operand(evaluator, line->ops[0]);
CHECK_OPERAND(op1);
EvaledOperand op2 = eval_operand(evaluator, line->ops[1]);
CHECK_OPERAND(op2);
if (op1.ty == EoTy_Reg) {
if (op2.ty == EoTy_Reg)
ASSEMBLE_ONE(s_jnz_r(op1.reg, op2.reg));
if (op2.ty == EoTy_Imm)
ASSEMBLE_ONE(s_jnz_i(op1.reg, op2.imm));
}
}
break;
case M_cmp:
if (line->ops_size == 2) {
EvaledOperand op1 = eval_operand(evaluator, line->ops[0]);
CHECK_OPERAND(op1);
EvaledOperand op2 = eval_operand(evaluator, line->ops[1]);
CHECK_OPERAND(op2);
if (op1.ty == EoTy_Reg) {
if (op2.ty == EoTy_Reg)
ASSEMBLE_ONE(s_cmp_r(op1.reg, op2.reg));
if (op2.ty == EoTy_Imm)
ASSEMBLE_ONE(s_cmp_i(op1.reg, op2.imm));
}
}
break;
case M_mov:
if (line->ops_size == 2) {
EvaledOperand op1 = eval_operand(evaluator, line->ops[0]);
CHECK_OPERAND(op1);
EvaledOperand op2 = eval_operand(evaluator, line->ops[1]);
CHECK_OPERAND(op2);
if (op1.ty == EoTy_Reg) {
if (op2.ty == EoTy_Reg)
ASSEMBLE_ONE(s_mov16_r_r(op1.reg, op2.reg));
if (op2.ty == EoTy_Imm)
ASSEMBLE_ONE(s_mov16_r_i(op1.reg, op2.imm));
if (op2.ty == EoTy_Mem8Reg)
ASSEMBLE_ONE(s_mov8_r_mr(op1.reg, op2.reg, op2.offset));
if (op2.ty == EoTy_Mem8Imm)
ASSEMBLE_ONE(s_mov8_r_mi(op1.reg, op2.imm));
if (op2.ty == EoTy_MemU16Reg)
ASSEMBLE_ONE(
s_mov16_r_mr(op1.reg, op2.reg, op2.offset));
if (op2.ty == EoTy_MemU16Imm) {
ASSEMBLE_ONE(s_mov16_r_mi(op1.reg, op2.imm));
}
}
if (op1.ty == EoTy_Mem8Reg) {
if (op2.ty == EoTy_Reg)
ASSEMBLE_ONE(s_mov8_mr_r(op1.reg, op1.offset, op2.reg));
if (op2.ty == EoTy_Imm)
ASSEMBLE_ONE(s_mov8_mr_i(op1.reg, op1.offset, op2.imm));
}
if (op1.ty == EoTy_Mem8Imm) {
if (op2.ty == EoTy_Reg)
ASSEMBLE_ONE(s_mov8_mi_r(op1.imm, op2.reg));
if (op2.ty == EoTy_Imm)
ASSEMBLE_ONE(s_mov8_mi_i(op1.imm, op2.imm));
}
if (op1.ty == EoTy_MemU16Reg) {
if (op2.ty == EoTy_Reg)
ASSEMBLE_ONE(
s_mov16_mr_r(op1.reg, op1.offset, op2.reg));
if (op2.ty == EoTy_Imm)
ASSEMBLE_ONE(
s_mov16_mr_i(op1.reg, op1.offset, op2.imm));
}
if (op1.ty == EoTy_MemU16Imm) {
if (op2.ty == EoTy_Reg)
ASSEMBLE_ONE(s_mov16_mi_r(op1.imm, op2.reg));
if (op2.ty == EoTy_Imm)
ASSEMBLE_ONE(s_mov16_mi_i(op1.imm, op2.imm));
}
}
break;
case M_in:
if (line->ops_size == 2) {
EvaledOperand dst = eval_operand(evaluator, line->ops[0]);
CHECK_OPERAND(dst);
EvaledOperand op1 = eval_operand(evaluator, line->ops[1]);
CHECK_OPERAND(op1);
if (dst.ty == EoTy_Reg) {
if (op1.ty == EoTy_Reg)
ASSEMBLE_ONE(s_in_r(dst.reg, op1.reg));
if (op1.ty == EoTy_Imm)
ASSEMBLE_ONE(s_in_i(dst.reg, op1.imm));
}
}
break;
case M_out:
if (line->ops_size == 2) {
EvaledOperand op1 = eval_operand(evaluator, line->ops[0]);
CHECK_OPERAND(op1);
EvaledOperand op2 = eval_operand(evaluator, line->ops[1]);
CHECK_OPERAND(op2);
if (op1.ty == EoTy_Reg) {
if (op2.ty == EoTy_Reg)
ASSEMBLE_ONE(s_out_r_r(op1.reg, op2.reg));
if (op2.ty == EoTy_Imm)
ASSEMBLE_ONE(s_out_r_i(op1.reg, op2.imm));
}
if (op1.ty == EoTy_Imm) {
if (op2.ty == EoTy_Reg)
ASSEMBLE_ONE(s_out_i_r(op1.imm, op2.reg));
if (op2.ty == EoTy_Imm)
ASSEMBLE_ONE(s_out_i_i(op1.imm, op2.imm));
}
}
break;
case M_call:
if (line->ops_size == 1) {
EvaledOperand op1 = eval_operand(evaluator, line->ops[0]);
CHECK_OPERAND(op1);
if (op1.ty == EoTy_Reg)
ASSEMBLE_ONE(s_call_r(op1.reg));
if (op1.ty == EoTy_Imm)
ASSEMBLE_ONE(s_call_i(op1.imm));
}
break;
case M_callf:
if (line->ops_size == 2) {
EvaledOperand op1 = eval_operand(evaluator, line->ops[0]);
CHECK_OPERAND(op1);
EvaledOperand op2 = eval_operand(evaluator, line->ops[1]);
CHECK_OPERAND(op2);
if (op1.ty == EoTy_Reg) {
if (op2.ty == EoTy_Reg)
ASSEMBLE_ONE(s_callf_r_r(op1.reg, op2.reg));
if (op2.ty == EoTy_Imm)
ASSEMBLE_ONE(s_callf_r_i(op1.reg, op2.imm));
}
if (op1.ty == EoTy_Imm) {
if (op2.ty == EoTy_Reg)
ASSEMBLE_ONE(s_callf_i_r(op1.imm, op2.reg));
if (op2.ty == EoTy_Imm)
ASSEMBLE_ONE(s_callf_i_i(op1.imm, op2.imm));
}
}
break;
case M_ret:
if (line->ops_size == 0) {
ASSEMBLE_ONE(s_ret());
}
break;
case M_retf:
if (line->ops_size == 0) {
ASSEMBLE_ONE(s_retf());
}
break;
case M_lit:
if (line->ops_size == 1) {
EvaledOperand op1 = eval_operand(evaluator, line->ops[0]);
CHECK_OPERAND(op1);
if (op1.ty == EoTy_Reg)
ASSEMBLE_ONE(s_lit_r(op1.reg));
if (op1.ty == EoTy_Imm)
ASSEMBLE_ONE(s_lit_i(op1.imm));
}
break;
case M_int:
if (line->ops_size == 1) {
EvaledOperand op1 = eval_operand(evaluator, line->ops[0]);
CHECK_OPERAND(op1);
if (op1.ty == EoTy_Imm) {
if (op1.imm > 0xff) {
reporter_error_with_loc(
rep, "interrupt id exceeds 1 byte", line->loc);
return 0;
}
ASSEMBLE_ONE(s_int((uint8_t)op1.imm));
}
}
break;
case M_iret:
if (line->ops_size == 0) {
ASSEMBLE_ONE(s_iret());
}
break;
case M_or:
if (line->ops_size == 3) {
EvaledOperand dst = eval_operand(evaluator, line->ops[0]);
CHECK_OPERAND(dst);
EvaledOperand op1 = eval_operand(evaluator, line->ops[1]);
CHECK_OPERAND(op1);
EvaledOperand op2 = eval_operand(evaluator, line->ops[2]);
CHECK_OPERAND(op2);
if (dst.ty == EoTy_Reg && op1.ty == EoTy_Reg) {
if (op2.ty == EoTy_Reg)
ASSEMBLE_ONE(s_or_r(dst.reg, op1.reg, op2.reg));
if (op2.ty == EoTy_Imm) {
ASSEMBLE_ONE(s_or_i(dst.reg, op1.reg, op2.imm));
}
}
}
break;
case M_xor:
if (line->ops_size == 3) {
EvaledOperand dst = eval_operand(evaluator, line->ops[0]);
CHECK_OPERAND(dst);
EvaledOperand op1 = eval_operand(evaluator, line->ops[1]);
CHECK_OPERAND(op1);
EvaledOperand op2 = eval_operand(evaluator, line->ops[2]);
CHECK_OPERAND(op2);
if (dst.ty == EoTy_Reg && op1.ty == EoTy_Reg) {
if (op2.ty == EoTy_Reg)
ASSEMBLE_ONE(s_xor_r(dst.reg, op1.reg, op2.reg));
if (op2.ty == EoTy_Imm)
ASSEMBLE_ONE(s_xor_i(dst.reg, op1.reg, op2.imm));
}
}
break;
case M_and:
if (line->ops_size == 3) {
EvaledOperand dst = eval_operand(evaluator, line->ops[0]);
CHECK_OPERAND(dst);
EvaledOperand op1 = eval_operand(evaluator, line->ops[1]);
CHECK_OPERAND(op1);
EvaledOperand op2 = eval_operand(evaluator, line->ops[2]);
CHECK_OPERAND(op2);
if (dst.ty == EoTy_Reg && op1.ty == EoTy_Reg) {
if (op2.ty == EoTy_Reg)
ASSEMBLE_ONE(s_and_r(dst.reg, op1.reg, op2.reg));
if (op2.ty == EoTy_Imm)
ASSEMBLE_ONE(s_and_i(dst.reg, op1.reg, op2.imm));
}
}
break;
case M_shl:
if (line->ops_size == 3) {
EvaledOperand dst = eval_operand(evaluator, line->ops[0]);
CHECK_OPERAND(dst);
EvaledOperand op1 = eval_operand(evaluator, line->ops[1]);
CHECK_OPERAND(op1);
EvaledOperand op2 = eval_operand(evaluator, line->ops[2]);
CHECK_OPERAND(op2);
if (dst.ty == EoTy_Reg && op1.ty == EoTy_Reg) {
if (op2.ty == EoTy_Reg)
ASSEMBLE_ONE(s_shl_r(dst.reg, op1.reg, op2.reg));
if (op2.ty == EoTy_Imm)
ASSEMBLE_ONE(s_shl_i(dst.reg, op1.reg, op2.imm));
}
}
break;
case M_rshl:
if (line->ops_size == 3) {
EvaledOperand dst = eval_operand(evaluator, line->ops[0]);
CHECK_OPERAND(dst);
EvaledOperand op1 = eval_operand(evaluator, line->ops[1]);
CHECK_OPERAND(op1);
EvaledOperand op2 = eval_operand(evaluator, line->ops[2]);
CHECK_OPERAND(op2);
if (dst.ty == EoTy_Reg && op1.ty == EoTy_Reg) {
if (op2.ty == EoTy_Reg)
ASSEMBLE_ONE(s_rshl_r(dst.reg, op1.reg, op2.reg));
if (op2.ty == EoTy_Imm)
ASSEMBLE_ONE(s_rshl_i(dst.reg, op1.reg, op2.imm));
}
}
break;
case M_shr:
if (line->ops_size == 3) {
EvaledOperand dst = eval_operand(evaluator, line->ops[0]);
CHECK_OPERAND(dst);
EvaledOperand op1 = eval_operand(evaluator, line->ops[1]);
CHECK_OPERAND(op1);
EvaledOperand op2 = eval_operand(evaluator, line->ops[2]);
CHECK_OPERAND(op2);
if (dst.ty == EoTy_Reg && op1.ty == EoTy_Reg) {
if (op2.ty == EoTy_Reg)
ASSEMBLE_ONE(s_shr_r(dst.reg, op1.reg, op2.reg));
if (op2.ty == EoTy_Imm)
ASSEMBLE_ONE(s_shr_i(dst.reg, op1.reg, op2.imm));
}
}
break;
case M_rshr:
if (line->ops_size == 3) {
EvaledOperand dst = eval_operand(evaluator, line->ops[0]);
CHECK_OPERAND(dst);
EvaledOperand op1 = eval_operand(evaluator, line->ops[1]);
CHECK_OPERAND(op1);
EvaledOperand op2 = eval_operand(evaluator, line->ops[2]);
CHECK_OPERAND(op2);
if (dst.ty == EoTy_Reg && op1.ty == EoTy_Reg) {
if (op2.ty == EoTy_Reg)
ASSEMBLE_ONE(s_rshr_r(dst.reg, op1.reg, op2.reg));
if (op2.ty == EoTy_Imm)
ASSEMBLE_ONE(s_rshr_i(dst.reg, op1.reg, op2.imm));
}
}
break;
case M_add:
if (line->ops_size == 3) {
EvaledOperand dst = eval_operand(evaluator, line->ops[0]);
CHECK_OPERAND(dst);
EvaledOperand op1 = eval_operand(evaluator, line->ops[1]);
CHECK_OPERAND(op1);
EvaledOperand op2 = eval_operand(evaluator, line->ops[2]);
CHECK_OPERAND(op2);
if (dst.ty == EoTy_Reg && op1.ty == EoTy_Reg) {
if (op2.ty == EoTy_Reg)
ASSEMBLE_ONE(s_add_r(dst.reg, op1.reg, op2.reg));
if (op2.ty == EoTy_Imm)
ASSEMBLE_ONE(s_add_i(dst.reg, op1.reg, op2.imm));
}
}
break;
case M_sub:
if (line->ops_size == 3) {
EvaledOperand dst = eval_operand(evaluator, line->ops[0]);
CHECK_OPERAND(dst);
EvaledOperand op1 = eval_operand(evaluator, line->ops[1]);
CHECK_OPERAND(op1);
EvaledOperand op2 = eval_operand(evaluator, line->ops[2]);
CHECK_OPERAND(op2);
if (dst.ty == EoTy_Reg && op1.ty == EoTy_Reg) {
if (op2.ty == EoTy_Reg)
ASSEMBLE_ONE(s_sub_r(dst.reg, op1.reg, op2.reg));
if (op2.ty == EoTy_Imm)
ASSEMBLE_ONE(s_sub_i(dst.reg, op1.reg, op2.imm));
}
}
break;
case M_rsub:
if (line->ops_size == 3) {
EvaledOperand dst = eval_operand(evaluator, line->ops[0]);
CHECK_OPERAND(dst);
EvaledOperand op1 = eval_operand(evaluator, line->ops[1]);
CHECK_OPERAND(op1);
EvaledOperand op2 = eval_operand(evaluator, line->ops[2]);
CHECK_OPERAND(op2);
if (dst.ty == EoTy_Reg && op1.ty == EoTy_Reg) {
if (op2.ty == EoTy_Reg)
ASSEMBLE_ONE(s_rsub_r(dst.reg, op1.reg, op2.reg));
if (op2.ty == EoTy_Imm)
ASSEMBLE_ONE(s_rsub_i(dst.reg, op1.reg, op2.imm));
}
}
break;
case M_mul:
if (line->ops_size == 3) {
EvaledOperand dst = eval_operand(evaluator, line->ops[0]);
CHECK_OPERAND(dst);
EvaledOperand op1 = eval_operand(evaluator, line->ops[1]);
CHECK_OPERAND(op1);
EvaledOperand op2 = eval_operand(evaluator, line->ops[2]);
CHECK_OPERAND(op2);
if (dst.ty == EoTy_Reg && op1.ty == EoTy_Reg) {
if (op2.ty == EoTy_Reg)
ASSEMBLE_ONE(s_mul_r(dst.reg, op1.reg, op2.reg));
if (op2.ty == EoTy_Imm)
ASSEMBLE_ONE(s_mul_i(dst.reg, op1.reg, op2.imm));
}
}
break;
case M_imul:
if (line->ops_size == 3) {
EvaledOperand dst = eval_operand(evaluator, line->ops[0]);
CHECK_OPERAND(dst);
EvaledOperand op1 = eval_operand(evaluator, line->ops[1]);
CHECK_OPERAND(op1);
EvaledOperand op2 = eval_operand(evaluator, line->ops[2]);
CHECK_OPERAND(op2);
if (dst.ty == EoTy_Reg && op1.ty == EoTy_Reg) {
if (op2.ty == EoTy_Reg)
ASSEMBLE_ONE(s_imul_r(dst.reg, op1.reg, op2.reg));
if (op2.ty == EoTy_Imm)
ASSEMBLE_ONE(s_imul_i(dst.reg, op1.reg, op2.imm));
}
}
break;
case M_div:
if (line->ops_size == 3) {
EvaledOperand dst = eval_operand(evaluator, line->ops[0]);
CHECK_OPERAND(dst);
EvaledOperand op1 = eval_operand(evaluator, line->ops[1]);
CHECK_OPERAND(op1);
EvaledOperand op2 = eval_operand(evaluator, line->ops[2]);
CHECK_OPERAND(op2);
if (dst.ty == EoTy_Reg && op1.ty == EoTy_Reg) {
if (op2.ty == EoTy_Reg)
ASSEMBLE_ONE(s_div_r(dst.reg, op1.reg, op2.reg));
if (op2.ty == EoTy_Imm)
ASSEMBLE_ONE(s_div_i(dst.reg, op1.reg, op2.imm));
}
}
break;
case M_idiv:
if (line->ops_size == 3) {
EvaledOperand dst = eval_operand(evaluator, line->ops[0]);
CHECK_OPERAND(dst);
EvaledOperand op1 = eval_operand(evaluator, line->ops[1]);
CHECK_OPERAND(op1);
EvaledOperand op2 = eval_operand(evaluator, line->ops[2]);
CHECK_OPERAND(op2);
if (dst.ty == EoTy_Reg && op1.ty == EoTy_Reg) {
if (op2.ty == EoTy_Reg)
ASSEMBLE_ONE(s_idiv_r(dst.reg, op1.reg, op2.reg));
if (op2.ty == EoTy_Imm)
ASSEMBLE_ONE(s_idiv_i(dst.reg, op1.reg, op2.imm));
}
}
break;
case M_rdiv:
if (line->ops_size == 3) {
EvaledOperand dst = eval_operand(evaluator, line->ops[0]);
CHECK_OPERAND(dst);
EvaledOperand op1 = eval_operand(evaluator, line->ops[1]);
CHECK_OPERAND(op1);
EvaledOperand op2 = eval_operand(evaluator, line->ops[2]);
CHECK_OPERAND(op2);
if (dst.ty == EoTy_Reg && op1.ty == EoTy_Reg) {
if (op2.ty == EoTy_Reg)
ASSEMBLE_ONE(s_rdiv_r(dst.reg, op1.reg, op2.reg));
if (op2.ty == EoTy_Imm)
ASSEMBLE_ONE(s_rdiv_i(dst.reg, op1.reg, op2.imm));
}
}
break;
case M_ridiv:
if (line->ops_size == 3) {
EvaledOperand dst = eval_operand(evaluator, line->ops[0]);
CHECK_OPERAND(dst);
EvaledOperand op1 = eval_operand(evaluator, line->ops[1]);
CHECK_OPERAND(op1);
EvaledOperand op2 = eval_operand(evaluator, line->ops[2]);
CHECK_OPERAND(op2);
if (dst.ty == EoTy_Reg && op1.ty == EoTy_Reg) {
if (op2.ty == EoTy_Reg)
ASSEMBLE_ONE(s_ridiv_r(dst.reg, op1.reg, op2.reg));
if (op2.ty == EoTy_Imm)
ASSEMBLE_ONE(s_ridiv_i(dst.reg, op1.reg, op2.imm));
}
}
break;
case M_mod:
if (line->ops_size == 3) {
EvaledOperand dst = eval_operand(evaluator, line->ops[0]);
CHECK_OPERAND(dst);
EvaledOperand op1 = eval_operand(evaluator, line->ops[1]);
CHECK_OPERAND(op1);
EvaledOperand op2 = eval_operand(evaluator, line->ops[2]);
CHECK_OPERAND(op2);
if (dst.ty == EoTy_Reg && op1.ty == EoTy_Reg) {
if (op2.ty == EoTy_Reg)
ASSEMBLE_ONE(s_mod_r(dst.reg, op1.reg, op2.reg));
if (op2.ty == EoTy_Imm)
ASSEMBLE_ONE(s_mod_i(dst.reg, op1.reg, op2.imm));
}
}
break;
case M_rmod:
if (line->ops_size == 3) {
EvaledOperand dst = eval_operand(evaluator, line->ops[0]);
CHECK_OPERAND(dst);
EvaledOperand op1 = eval_operand(evaluator, line->ops[1]);
CHECK_OPERAND(op1);
EvaledOperand op2 = eval_operand(evaluator, line->ops[2]);
CHECK_OPERAND(op2);
if (dst.ty == EoTy_Reg && op1.ty == EoTy_Reg) {
if (op2.ty == EoTy_Reg)
ASSEMBLE_ONE(s_rmod_r(dst.reg, op1.reg, op2.reg));
if (op2.ty == EoTy_Imm)
ASSEMBLE_ONE(s_rmod_i(dst.reg, op1.reg, op2.imm));
}
}
break;
case M_push:
if (line->ops_size == 1) {
EvaledOperand op1 = eval_operand(evaluator, line->ops[0]);
CHECK_OPERAND(op1);
if (op1.ty == EoTy_Reg) {
uint16_t size = 0;
Line l;
l = s_add_i(Rsp, Rsp, 2);
size += assemble_line(object, &l);
l = s_mov16_mr_r(Rsp, 0, op1.reg);
size += assemble_line(object, &l);
return size;
}
if (op1.ty == EoTy_Imm) {
uint16_t size = 0;
Line l;
l = s_add_i(Rsp, Rsp, 2);
size += assemble_line(object, &l);
l = s_mov16_mr_i(Rsp, 0, op1.imm);
size += assemble_line(object, &l);
return size;
}
}
break;
case M_pop:
if (line->ops_size == 1) {
EvaledOperand op1 = eval_operand(evaluator, line->ops[0]);
CHECK_OPERAND(op1);
if (op1.ty == EoTy_Reg) {
uint16_t size = 0;
Line l;
l = s_mov16_r_mr(op1.reg, Rsp, 0);
size += assemble_line(object, &l);
l = s_sub_i(Rsp, Rsp, 2);
size += assemble_line(object, &l);
return size;
}
}
break;
}
reporter_error_with_loc(rep, "malformed instruction", line->loc);
return 0;
#undef CHECK_OPERAND
#undef ASSEMBLE_ONE
}
typedef struct {
const char* input_file;
const char* output_file;
} Args;
static inline Args parse_args(int argc, char** argv);
int main(int argc, char** argv)
{
int res = 0;
Args args = parse_args(argc, argv);
FILE* input_fp = fopen(args.input_file, "r");
if (!input_fp) {
REPORTF_ERROR("could not open input file '%s': %s",
args.input_file,
strerror(errno));
return -1;
}
fseek(input_fp, 0L, SEEK_END);
size_t file_size = (size_t)ftell(input_fp);
rewind(input_fp);
char* input_text = calloc(file_size + 1, sizeof(char));
size_t bytes_read = fread(input_text, sizeof(char), file_size, input_fp);
fclose(input_fp);
if (bytes_read != file_size) {
REPORTF_ERROR("could not read input file '%s': %s",
args.input_file,
strerror(errno));
return -1;
}
bool errors_occured = false;
Parser parser;
parser_construct(&parser, args.input_file, input_text);
Reporter rep = {
.filename = parser.lexer.filename,
.text = parser.lexer.text,
.text_len = parser.lexer.text_len,
};
size_t lines_capacity = 1024;
PLine** lines = malloc(sizeof(PLine*) * lines_capacity);
size_t lines_size = 0;
while (!parser_done(&parser)) {
if (lines_size + 1 > lines_capacity) {
lines_capacity += 2;
lines = realloc(lines, sizeof(PLine*) * lines_capacity);
}
PLine* line = parser_next(&parser);
if (!line) {
continue;
}
lines[lines_size++] = line;
}
errors_occured &= parser_error_occured(&parser);
IdentResolver resolver;
ident_resolver_construct(&resolver);
OperandEvaluator evaluator = {
.re = &resolver,
.rep = &rep,
.unresolve_is_error = false,
};
size_t chunk_capacity = 64;
uint16_t* chunk = malloc(sizeof(uint16_t) * chunk_capacity);
uint16_t ip = 0;
for (size_t i = 0; i < lines_size; ++i) {
int res = define_labels(&resolver, lines[i]->labels, ip, &rep);
if (res != 0)
errors_occured = true;
ip += pline_assemble(&evaluator, chunk, lines[i], &rep);
if (ip == 0)
errors_occured = true;
}
if (errors_occured) {
fprintf(stderr, "nothing written. stopping...\n");
res = -1;
goto leave_free_chunk;
}
evaluator.unresolve_is_error = true;
FILE* output_fp = fopen(args.output_file, "wb");
if (!output_fp) {
REPORTF_ERROR("could not open output file '%s': %s",
args.output_file,
strerror(errno));
res = -1;
goto leave_free_chunk;
}
size_t total_bytes_written = 0;
for (size_t i = 0; i < lines_size; ++i) {
use_labels(&resolver, lines[i]->labels);
size_t size = pline_assemble(&evaluator, chunk, lines[i], &rep);
if (size == 0) {
errors_occured = true;
}
if (errors_occured)
continue;
size_t bytes_written = fwrite(chunk, sizeof(uint16_t), size, output_fp);
total_bytes_written += bytes_written;
if (bytes_written != size) {
REPORTF_ERROR("could not write to output file '%s': %s",
args.output_file,
strerror(errno));
errors_occured = true;
}
}
fclose(output_fp);
if (errors_occured) {
fprintf(
stderr, "%ld bytes written. stopping...\n", total_bytes_written);
res = -1;
goto leave_free_chunk;
}
res = 0;
leave_free_chunk:
free(chunk);
// leave_free_lines:
for (size_t i = 0; i < lines_size; ++i) {
pline_free(lines[i]);
}
free(lines);
free(input_text);
ident_resolver_destroy(&resolver);
return res;
}
static inline Args parse_args(int argc, char** argv)
{
const char* input_file = NULL;
const char* output_file = NULL;
for (int i = 1; i < argc; ++i) {
if (strcmp(argv[i], "-o") == 0) {
i += 1;
if (i >= argc) {
REPORTF_ERROR("%s", "no filename given to -o");
exit(1);
}
output_file = argv[i];
} else {
if (input_file != NULL) {
REPORTF_ERROR("%s", "multiple input files specified");
exit(1);
}
input_file = argv[i];
}
}
if (input_file == NULL) {
REPORTF_ERROR("%s", "no input file");
exit(1);
}
if (output_file == NULL) {
output_file = "out.o";
}
return (Args) {
input_file,
output_file,
};
}