add asm parser

2025-09-06 02:01:16 +02:00 · 2025-09-06 02:01:16 +02:00 · 9eb3922ad6
commit 9eb3922ad6
parent 8b4a303315
5 changed files with 1072 additions and 2 deletions
--- a/asm/main.c
+++ b/asm/main.c
@ -1,6 +1,114 @@
+#include "parse.h"
+#include "report.h"
+#include <errno.h>
 #include <stdio.h>
+#include <stdlib.h>
+#include <string.h>

-int main(void)
+typedef struct {
+    char* input_file;
+    char* output_file;
+} Args;
+
+static int parse_args(Args* args, int argc, char** argv);
+
+static char* read_text_file(const char* filename);
+
+int main(int argc, char** argv)
 {
-    puts("asm");
+    Args args;
+    if (parse_args(&args, argc, argv) != 0) {
+        return EXIT_FAILURE;
+    }
+
+    char* text = read_text_file(args.input_file);
+    if (!text) {
+        return EXIT_FAILURE;
+    }
+
+    Parser* parser = parser_new(args.input_file, text);
+
+    size_t lines_capacity = 1024;
+    PLine** lines = malloc(sizeof(PLine*) * lines_capacity);
+    size_t lines_size = 0;
+
+    while (!parser_done(parser)) {
+        if (parser_next_is_const(parser)) {
+            PConst* stmt = parser_parse_const(parser);
+
+        } else if (parser_next_is_include(parser)) {
+            PInclude* stmt = parser_parse_include(parser);
+
+        } else {
+            PLabel* label;
+            while ((label = parser_parse_label(parser)) != nullptr) { }
+        }
+    }
+
+    return EXIT_SUCCESS;
+}
+
+int parse_args(Args* args, int argc, char** argv)
+{
+    *args = (Args) {
+        .input_file = nullptr,
+        .output_file = nullptr,
+    };
+
+    int i = 1;
+    while (i < argc) {
+        if (strcmp(argv[i], "-o") == 0) {
+            i += 1;
+            if (i >= argc) {
+                fprintf(stderr, FMT_ERROR("expected filename after '-o'"));
+                return 1;
+            }
+            args->output_file = argv[i];
+            i += 1;
+        } else if (argv[i][0] == '-') {
+            fprintf(stderr, FMT_ERROR("unrecognized argument '%s'"), argv[i]);
+            return 1;
+        } else {
+            if (args->input_file != nullptr) {
+                fprintf(stderr, FMT_ERROR("multiple input files"));
+                return 1;
+            }
+            args->input_file = argv[i];
+            i += 1;
+        }
+    }
+
+    if (!args->input_file) {
+        fprintf(stderr, FMT_ERROR("no input file"));
+        return 1;
+    }
+
+    return 0;
+}
+
+char* read_text_file(const char* filename)
+{
+    FILE* fp = fopen(filename, "r");
+    if (!fp) {
+        fprintf(stderr,
+            FMT_ERROR("could not open file '%s' for reading: %s"),
+            filename,
+            strerror(errno));
+        return NULL;
+    }
+    fseek(fp, 0L, SEEK_END);
+    size_t file_size = (size_t)ftell(fp);
+    rewind(fp);
+
+    char* text = calloc(file_size + 1, sizeof(char));
+    size_t bytes_read = fread(text, sizeof(char), file_size, fp);
+    fclose(fp);
+    if (bytes_read != file_size) {
+        fprintf(stderr,
+            FMT_ERROR("could not read input file '%s': %s"),
+            filename,
+            strerror(errno));
+        return NULL;
+    }
+    return text;
 }
--- a/asm/parse.c
+++ b/asm/parse.c
@ -0,0 +1,814 @@
+#include "parse.h"
+#include "report.h"
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+typedef enum {
+    TT_Err,
+    TT_Eof,
+    TT_Ident,
+    TT_Int,
+    TT_Binary,
+    TT_Hex,
+    TT_Char,
+    TT_Str,
+    TT_Newline = '\n',
+    TT_DoubleLt,
+    TT_DoubleGt,
+    TT_Pipe = '|',
+    TT_Hat = '^',
+    TT_Ampersand = '&',
+    TT_Plus = '+',
+    TT_Minus = '-',
+    TT_Asterisk = '*',
+    TT_Slash = '/',
+    TT_Percent = '%',
+    TT_LParen = '(',
+    TT_RParen = ')',
+    TT_LBracket = '[',
+    TT_RBracket = ']',
+    TT_Dot = '.',
+    TT_Comma = ',',
+    TT_Colon = ':',
+    TT_Exclamation = '!',
+} TokTy;
+
+typedef struct {
+    TokTy ty;
+    Loc loc;
+    size_t len;
+} Tok;
+
+typedef struct {
+    const char* filename;
+    const char* text;
+    size_t text_len;
+    size_t idx;
+    int line;
+    int col;
+    char ch;
+    bool error_occured;
+} Lexer;
+
+static void lexer_init(Lexer* lexer, const char* filename, const char* text);
+static Tok lexer_next(Lexer* lexer);
+
+static void lexer_report(Lexer* lexer, const char* msg, Loc loc);
+static int lexer_skip_literal_char(Lexer* lexer);
+static Tok lexer_tok(const Lexer* lexer, TokTy ty, Loc loc);
+static Loc lexer_loc(const Lexer* lexer);
+static void lexer_step(Lexer* lexer);
+static bool lexer_done(const Lexer* lexer);
+static bool str_includes(const char* str, char ch);
+
+struct Parser {
+    Lexer lexer;
+    Tok tok;
+    Tok eaten;
+    bool error_occured;
+
+    bool label_fail;
+    Tok last_ident_tok;
+};
+
+static PExpr* parser_parse_operand_3(Parser* parser);
+static PExpr* parser_parse_operand_2(Parser* parser, int prec);
+static PExpr* parser_parse_operand_1(Parser* parser);
+static PExpr* parser_parse_operand_0(Parser* parser);
+static void parser_skip_to_next_line(Parser* parser);
+static void parser_report(Parser* parser, const char* msg, Loc loc);
+static char literal_char_val(const char* str);
+static char* parser_str_val(const Parser* parser, size_t* str_len, Tok tok);
+static char* parser_tok_strdup(const Parser* parser, Tok tok);
+static bool parser_tok_streq(const Parser* parser, Tok tok, const char* text);
+static bool parser_eat(Parser* parser, TokTy ty);
+static bool parser_test(const Parser* parser, TokTy ty);
+static void parser_step(Parser* parser);
+
+Parser* parser_new(const char* filename, const char* text)
+{
+    Parser* parser = malloc(sizeof(Parser));
+
+    *parser = (Parser) {
+        .lexer = {},
+        .tok = {},
+        .eaten = {},
+        .error_occured = false,
+
+        .label_fail = false,
+        .last_ident_tok = {},
+    };
+
+    lexer_init(&parser->lexer, filename, text);
+    parser->tok = lexer_next(&parser->lexer);
+
+    return parser;
+}
+
+void parser_free(Parser* parser)
+{
+    free(parser);
+}
+
+bool parser_next_is_const(Parser* parser)
+{
+    return parser_test(parser, TT_Ident)
+        && parser_tok_streq(parser, parser->tok, "const");
+}
+
+bool parser_next_is_include(Parser* parser)
+{
+    return parser_test(parser, TT_Ident)
+        && parser_tok_streq(parser, parser->tok, "include");
+}
+
+PConst* parser_parse_const(Parser* parser)
+{
+
+    Loc loc = parser->tok.loc;
+
+    parser_step(parser);
+    if (!parser_eat(parser, TT_Ident)) {
+        parser_report(parser, "expected identifier", parser->tok.loc);
+        return nullptr;
+    }
+    char* ident = parser_tok_strdup(parser, parser->eaten);
+    PExpr* value = parser_parse_operand_3(parser);
+
+    PConst* stmt = malloc(sizeof(PConst));
+    *stmt = (PConst) { loc, ident, value };
+    return stmt;
+}
+
+PInclude* parser_parse_include(Parser* parser)
+{
+    Loc loc = parser->tok.loc;
+    parser_step(parser);
+    if (!parser_eat(parser, TT_Str)) {
+        parser_report(parser, "expected string", parser->tok.loc);
+        return nullptr;
+    }
+    size_t str_len;
+    char* str = parser_str_val(parser, &str_len, parser->eaten);
+
+    PInclude* stmt = malloc(sizeof(PInclude));
+    *stmt = (PInclude) { loc, str };
+    return stmt;
+}
+
+PLabel* parser_parse_label(Parser* parser)
+{
+    if (parser->tok.ty == TT_Eof || parser->label_fail)
+        return nullptr;
+
+    parser_skip_newlines(parser);
+    Loc loc = parser->tok.loc;
+    if (parser_eat(parser, '.')) {
+        if (!parser_eat(parser, TT_Ident)) {
+            parser_report(parser, "expected identifier", parser->tok.loc);
+            return nullptr;
+        }
+        char* ident = parser_tok_strdup(parser, parser->eaten);
+        if (!parser_eat(parser, ':')) {
+            parser_report(parser, "expected ':'", parser->tok.loc);
+            free(ident);
+            return nullptr;
+        }
+        PLabel* label = malloc(sizeof(PLabel));
+        *label = (PLabel) { loc, ident, .local = true };
+        return label;
+    } else if (parser_eat(parser, TT_Ident)) {
+        parser->last_ident_tok = parser->eaten;
+        if (!parser_eat(parser, ':')) {
+            parser->label_fail = true;
+            return nullptr;
+        }
+        char* ident = parser_tok_strdup(parser, parser->last_ident_tok);
+
+        PLabel* label = malloc(sizeof(PLabel));
+        *label = (PLabel) { loc, ident, .local = false };
+        return label;
+    } else {
+        parser_report(parser, "expected identifier or ':'", parser->tok.loc);
+        return nullptr;
+    }
+}
+
+PLine* parser_parse_line(Parser* parser)
+{
+    constexpr size_t max_ops_size = 2;
+    PExpr* ops[max_ops_size];
+    size_t ops_size = 0;
+
+    if (!parser_test(parser, TT_Eof) && !parser_test(parser, '\n')) {
+        PExpr* operand = parser_parse_operand_3(parser);
+        if (!operand) {
+            parser_skip_to_next_line(parser);
+            goto error_free_ops;
+        }
+        ops[ops_size++] = operand;
+        while (!parser_test(parser, TT_Eof) && !parser_test(parser, '\n')
+            && ops_size < 3) {
+            if (ops_size >= max_ops_size) {
+                parser_report(parser,
+                    "exceeded maximum number of operands (64)",
+                    parser->tok.loc);
+                parser_skip_to_next_line(parser);
+                goto error_free_ops;
+            }
+            if (!parser_eat(parser, ',')) {
+                parser_report(parser, "expected ','", parser->tok.loc);
+                parser_skip_to_next_line(parser);
+                goto error_free_ops;
+            }
+            PExpr* operand = parser_parse_operand_3(parser);
+            if (!operand) {
+                parser_skip_to_next_line(parser);
+                goto error_free_ops;
+            }
+            ops[ops_size++] = operand;
+        }
+    }
+    if (!parser_eat(parser, '\n') && !parser_test(parser, TT_Eof)) {
+        parser_report(parser, "expected newline", parser->tok.loc);
+        goto error_free_ops;
+    }
+    parser_skip_newlines(parser);
+
+    PLine* line = malloc(sizeof(PLine));
+    *line = (PLine) {
+        parser->last_ident_tok.loc,
+        parser_tok_strdup(parser, parser->last_ident_tok),
+        .ops = {},
+        .ops_size = ops_size,
+    };
+
+    for (size_t i = 0; i < ops_size; ++i)
+        line->ops[i] = ops[i];
+
+    return line;
+
+error_free_ops:
+    for (size_t i = 0; i < ops_size; ++i)
+        if (ops[i])
+            pexpr_free(ops[i]);
+    return nullptr;
+}
+
+static const int parser_binary_prec = 6;
+
+PExpr* parser_parse_operand_3(Parser* parser)
+{
+    Loc loc = parser->tok.loc;
+    if (parser_eat(parser, TT_LBracket)) {
+        parser_report(parser, "expected 'u8' or 'u16' before '['", loc);
+        return NULL;
+    }
+    if (!parser_test(parser, TT_Ident)) {
+        return parser_parse_operand_2(parser, parser_binary_prec);
+    }
+    if (parser_eat(parser, '[')) {
+        PExpr* operand = parser_parse_operand_2(parser, parser_binary_prec);
+        if (!parser_eat(parser, ']')) {
+            parser_report(parser, "expected ']'", parser->tok.loc);
+            pexpr_free(operand);
+            return nullptr;
+        }
+        PExpr* expr = malloc(sizeof(PExpr));
+        *expr = (PExpr) {
+            .ty = PExprTy_Mem,
+            .loc = loc,
+            .operand = operand,
+        };
+        return expr;
+    } else {
+        return parser_parse_operand_2(parser, parser_binary_prec);
+    }
+}
+
+PExpr* parser_parse_operand_2(Parser* parser, int prec)
+{
+    const PExprTy op_tys[] = {
+        PExprTy_Or,
+        PExprTy_Xor,
+        PExprTy_And,
+        PExprTy_Shr,
+        PExprTy_Shl,
+        PExprTy_Add,
+        PExprTy_Sub,
+        PExprTy_Mul,
+        PExprTy_Div,
+        PExprTy_Mod,
+    };
+    const TokTy op_tts[] = {
+        '|',
+        '^',
+        '&',
+        TT_DoubleGt,
+        TT_DoubleLt,
+        '+',
+        '-',
+        '*',
+        '/',
+        '%',
+    };
+    const int op_precs[] = { 6, 5, 4, 3, 3, 2, 2, 1, 1, 1 };
+    static_assert(sizeof(op_tys) / sizeof(op_tys[0])
+            == sizeof(op_tts) / sizeof(op_tts[0]),
+        "misaligned");
+    static_assert(sizeof(op_tys) / sizeof(op_tys[0])
+            == sizeof(op_precs) / sizeof(op_precs[0]),
+        "misaligned");
+
+    if (prec == 0) {
+        return parser_parse_operand_1(parser);
+    }
+    PExpr* left = parser_parse_operand_2(parser, prec - 1);
+    bool should_continue = true;
+    while (should_continue) {
+        should_continue = false;
+        for (size_t i = 0; i < sizeof(op_tys) / sizeof(op_tys[0]); ++i) {
+            if (prec >= op_precs[i] && parser_eat(parser, op_tts[i])) {
+                PExpr* right = parser_parse_operand_2(parser, prec - 1);
+
+                PExpr* new_left = malloc(sizeof(PExpr));
+                *new_left = (PExpr) {
+                    .ty = op_tys[i],
+                    .loc = left->loc,
+                    .left = left,
+                    .right = right,
+                };
+                left = new_left;
+
+                should_continue = true;
+                break;
+            }
+        }
+    }
+    return left;
+}
+
+PExpr* parser_parse_operand_1(Parser* parser)
+{
+
+    Loc loc = parser->tok.loc;
+    if (parser_eat(parser, '-')) {
+        PExpr* operand = parser_parse_operand_1(parser);
+
+        PExpr* expr = malloc(sizeof(PExpr));
+        *expr = (PExpr) {
+            .ty = PExprTy_Negate,
+            .loc = loc,
+            .operand = operand,
+        };
+        return expr;
+    } else if (parser_eat(parser, '!')) {
+        PExpr* operand = parser_parse_operand_1(parser);
+
+        PExpr* expr = malloc(sizeof(PExpr));
+        *expr = (PExpr) {
+            .ty = PExprTy_Not,
+            .loc = loc,
+            .operand = operand,
+        };
+        return expr;
+    } else {
+        return parser_parse_operand_0(parser);
+    }
+}
+
+PExpr* parser_parse_operand_0(Parser* parser)
+{
+    Loc loc = parser->tok.loc;
+    if (parser_eat(parser, TT_Ident)) {
+        char* ident = parser_tok_strdup(parser, parser->eaten);
+
+        PExpr* expr = malloc(sizeof(PExpr));
+        *expr = (PExpr) {
+            .ty = PExprTy_Ident,
+            .loc = loc,
+            .str = ident,
+        };
+        return expr;
+    } else if (parser_eat(parser, TT_Int)) {
+        char* str = parser_tok_strdup(parser, parser->eaten);
+        uint64_t val = strtoull(str, NULL, 10);
+        free(str);
+        if (val > 0xffff) {
+            parser_report(parser,
+                "integers larger than 65536 not supported",
+                parser->tok.loc);
+            return nullptr;
+        }
+        uint16_t imm = (uint16_t)val;
+
+        PExpr* expr = malloc(sizeof(PExpr));
+        *expr = (PExpr) {
+            .ty = PExprTy_Imm,
+            .loc = loc,
+            .imm = imm,
+        };
+        return expr;
+    } else if (parser_eat(parser, TT_Binary)) {
+        char* str = parser_tok_strdup(parser, parser->eaten);
+        uint64_t val = strtoull(&str[2], NULL, 2);
+        free(str);
+        if (val > 0xffff) {
+            parser_report(parser,
+                "integers larger than 65536 not supported",
+                parser->tok.loc);
+            return NULL;
+        }
+        uint16_t imm = (uint16_t)val;
+
+        PExpr* expr = malloc(sizeof(PExpr));
+        *expr = (PExpr) {
+            .ty = PExprTy_Imm,
+            .loc = loc,
+            .imm = imm,
+        };
+        return expr;
+    } else if (parser_eat(parser, TT_Hex)) {
+        char* str = parser_tok_strdup(parser, parser->eaten);
+        uint64_t val = strtoull(&str[2], NULL, 16);
+        free(str);
+        if (val > 0xffff) {
+            parser_report(parser,
+                "integers larger than 65536 not supported",
+                parser->tok.loc);
+            return NULL;
+        }
+        uint16_t imm = (uint16_t)val;
+
+        PExpr* expr = malloc(sizeof(PExpr));
+        *expr = (PExpr) {
+            .ty = PExprTy_Imm,
+            .loc = loc,
+            .imm = imm,
+        };
+        return expr;
+    } else if (parser_eat(parser, TT_Char)) {
+        char* str = parser_tok_strdup(parser, parser->eaten);
+        uint16_t imm = (uint16_t)literal_char_val(&str[1]);
+        free(str);
+
+        PExpr* expr = malloc(sizeof(PExpr));
+        *expr = (PExpr) {
+            .ty = PExprTy_Imm,
+            .loc = loc,
+            .imm = imm,
+        };
+        return expr;
+    } else if (parser_eat(parser, TT_Str)) {
+        size_t str_len;
+        char* str = parser_str_val(parser, &str_len, parser->eaten);
+
+        PExpr* expr = malloc(sizeof(PExpr));
+        *expr = (PExpr) {
+            .ty = PExprTy_Str,
+            .loc = loc,
+            .str = str,
+        };
+        return expr;
+    } else if (parser_eat(parser, '.')) {
+        if (!parser_eat(parser, TT_Ident)) {
+            parser_report(parser, "expected identifier", parser->tok.loc);
+            return NULL;
+        }
+        char* ident = parser_tok_strdup(parser, parser->eaten);
+
+        PExpr* expr = malloc(sizeof(PExpr));
+        *expr = (PExpr) {
+            .ty = PExprTy_SubLabel,
+            .loc = loc,
+            .str = ident,
+        };
+        return expr;
+    } else if (parser_eat(parser, '(')) {
+        PExpr* operand = parser_parse_operand_2(parser, parser_binary_prec);
+        if (!parser_eat(parser, ')')) {
+            parser_report(parser, "expected ')'", parser->tok.loc);
+            pexpr_free(operand);
+            return NULL;
+        }
+        return operand;
+    } else {
+        parser_report(parser, "expected operand", parser->tok.loc);
+        return NULL;
+    }
+}
+
+void parser_skip_to_next_line(Parser* parser)
+{
+    while (!parser_done(parser) && !parser_eat(parser, TT_Newline)) {
+        parser_step(parser);
+    }
+}
+
+bool parser_error_occured(const Parser* parser)
+{
+    return parser->error_occured || parser->lexer.error_occured;
+}
+
+void parser_skip_newlines(Parser* parser)
+{
+    while (parser_eat(parser, '\n')) { }
+}
+
+void parser_report(Parser* parser, const char* msg, Loc loc)
+{
+    parser->error_occured = true;
+    fprintf(stderr, FMT_ERROR("%s"), msg);
+    loc_pretty_print(loc, parser->lexer.text, parser->lexer.text_len);
+}
+
+char* parser_str_val(const Parser* parser, size_t* str_len, Tok tok)
+{
+    char* lit = parser_tok_strdup(parser, tok);
+    char* str = calloc(tok.len - 1, sizeof(char));
+    *str_len = 0;
+    for (size_t i = 1; i < tok.len - 1; ++i) {
+        str[*str_len] = literal_char_val(&lit[i]);
+        *str_len += 1;
+    }
+    free(lit);
+    return str;
+}
+
+char literal_char_val(const char* str)
+{
+    if (str[0] == '\\') {
+        switch (str[1]) {
+            case '0':
+                return 0;
+            case 't':
+                return '\t';
+            case 'n':
+                return '\n';
+            default:
+                return str[1];
+        }
+    } else {
+        return str[0];
+    }
+}
+
+char* parser_tok_strdup(const Parser* parser, Tok tok)
+{
+    return strndup(&parser->lexer.text[tok.loc.idx], tok.len);
+}
+
+bool parser_tok_streq(const Parser* parser, Tok tok, const char* text)
+{
+    return tok.len == strlen(text)
+        && strncmp(&parser->lexer.text[tok.loc.idx], text, tok.len) == 0;
+}
+
+bool parser_eat(Parser* parser, TokTy ty)
+{
+    if (parser_test(parser, ty)) {
+        parser->eaten = parser->tok;
+        parser_step(parser);
+        return true;
+    }
+    return false;
+}
+
+bool parser_test(const Parser* parser, TokTy ty)
+{
+    return parser->tok.ty == ty;
+}
+
+void parser_step(Parser* parser)
+{
+    parser->tok = lexer_next(&parser->lexer);
+}
+
+bool parser_done(const Parser* parser)
+{
+    return parser->tok.ty == TT_Eof;
+}
+
+void lexer_init(Lexer* lexer, const char* filename, const char* text)
+{
+    *lexer = (Lexer) {
+        .filename = filename,
+        .text = text,
+        .text_len = strlen(text),
+        .idx = 0,
+        .line = 1,
+        .col = 1,
+        .ch = text[0],
+        .error_occured = false,
+    };
+}
+
+Tok lexer_next(Lexer* lexer)
+{
+    const char* ident_chars = "abcdefghijklmnopqrstuvwxyz"
+                              "ABCDEFGHIJKLMNOPQRSTUVWXYZ_$";
+    const char* int_chars = "1234567890";
+    const char* hex_chars = "01234567889abcdefABCDEF";
+
+    Loc loc = lexer_loc(lexer);
+    if (lexer_done(lexer)) {
+        return lexer_tok(lexer, TT_Eof, loc);
+    }
+    if (lexer->ch == '\n') {
+        lexer_step(lexer);
+        return lexer_tok(lexer, '\n', loc);
+    } else if (str_includes(" \t", lexer->ch)) {
+        while (!lexer_done(lexer) && str_includes(" \t", lexer->ch)) {
+            lexer_step(lexer);
+        }
+        return lexer_next(lexer);
+    } else if (str_includes(ident_chars, lexer->ch)) {
+        while (!lexer_done(lexer)
+            && (str_includes(ident_chars, lexer->ch)
+                || str_includes(int_chars, lexer->ch))) {
+            lexer_step(lexer);
+        }
+        return lexer_tok(lexer, TT_Ident, loc);
+    } else if (str_includes(int_chars, lexer->ch) && lexer->ch != '0') {
+        while (!lexer_done(lexer) && (str_includes(int_chars, lexer->ch))) {
+            lexer_step(lexer);
+        }
+        return lexer_tok(lexer, TT_Int, loc);
+    } else if (lexer->ch == ';') {
+        while (!lexer_done(lexer) && lexer->ch != '\n') {
+            lexer_step(lexer);
+        }
+        return lexer_next(lexer);
+    } else if (lexer->ch == '0') {
+        lexer_step(lexer);
+        if (lexer->ch == 'b') {
+            lexer_step(lexer);
+            if (lexer_done(lexer) || !str_includes("01", lexer->ch)) {
+                lexer_report(lexer, "malformed binary literal", loc);
+                return lexer_tok(lexer, TT_Err, loc);
+            }
+            while (!lexer_done(lexer) && str_includes("01", lexer->ch)) {
+                lexer_step(lexer);
+            }
+            return lexer_tok(lexer, TT_Binary, loc);
+        } else if (lexer->ch == 'x') {
+            lexer_step(lexer);
+            if (lexer_done(lexer) || !str_includes(hex_chars, lexer->ch)) {
+                lexer_report(lexer, "malformed hex literal", loc);
+                return lexer_tok(lexer, TT_Err, loc);
+            }
+            while (!lexer_done(lexer) && str_includes(hex_chars, lexer->ch)) {
+                lexer_step(lexer);
+            }
+            return lexer_tok(lexer, TT_Hex, loc);
+
+        } else {
+            return lexer_tok(lexer, TT_Int, loc);
+        }
+    } else if (lexer->ch == '\'') {
+        lexer_step(lexer);
+        lexer_skip_literal_char(lexer);
+        if (lexer_done(lexer) || lexer->ch != '\'') {
+            lexer_report(lexer, "malformed character literal", loc);
+            return lexer_tok(lexer, TT_Err, loc);
+        }
+        lexer_step(lexer);
+        return lexer_tok(lexer, TT_Char, loc);
+    } else if (lexer->ch == '"') {
+        lexer_step(lexer);
+        while (!lexer_done(lexer) && lexer->ch != '"') {
+            lexer_skip_literal_char(lexer);
+        }
+        if (lexer_done(lexer) || lexer->ch != '"') {
+            lexer_report(lexer, "malformed string literal", loc);
+            return lexer_tok(lexer, TT_Err, loc);
+        }
+        lexer_step(lexer);
+        return lexer_tok(lexer, TT_Str, loc);
+    } else if (lexer->ch == '<') {
+        lexer_step(lexer);
+        if (!lexer_done(lexer) && lexer->ch == '<') {
+            lexer_step(lexer);
+            return lexer_tok(lexer, TT_DoubleLt, loc);
+        } else {
+            lexer_report(lexer, "expected '<'", loc);
+            return lexer_tok(lexer, TT_Err, loc);
+        }
+    } else if (lexer->ch == '>') {
+        lexer_step(lexer);
+        if (!lexer_done(lexer) && lexer->ch == '>') {
+            lexer_step(lexer);
+            return lexer_tok(lexer, TT_DoubleGt, loc);
+        } else {
+            lexer_report(lexer, "expected '>'", loc);
+            return lexer_tok(lexer, TT_Err, loc);
+        }
+    } else if (str_includes("|^&+-*/%()[].,:!", lexer->ch)) {
+        char ch = lexer->ch;
+        lexer_step(lexer);
+        return lexer_tok(lexer, (TokTy)ch, loc);
+    } else {
+        lexer_report(lexer, "illegal character", loc);
+        lexer_step(lexer);
+        return lexer_tok(lexer, TT_Err, loc);
+    }
+}
+
+int lexer_skip_literal_char(Lexer* lexer)
+{
+    char ch = lexer->ch;
+    lexer_step(lexer);
+    if (ch == '\\') {
+        if (lexer_done(lexer))
+            return -1;
+        lexer_step(lexer);
+    }
+    return 0;
+}
+
+void lexer_step(Lexer* lexer)
+{
+    if (lexer_done(lexer)) {
+        return;
+    }
+    if (lexer->ch == '\n') {
+        lexer->line += 1;
+        lexer->col = 1;
+    } else {
+        lexer->col += 1;
+    }
+    lexer->idx += 1;
+    lexer->ch = lexer->text[lexer->idx];
+}
+
+void lexer_report(Lexer* lexer, const char* msg, Loc loc)
+{
+    lexer->error_occured = true;
+    fprintf(stderr, FMT_ERROR("%s"), msg);
+    loc_pretty_print(loc, lexer->text, lexer->text_len);
+}
+
+Loc lexer_loc(const Lexer* lexer)
+{
+    return (Loc) {
+        .filename = lexer->filename,
+        .idx = lexer->idx,
+        .line = lexer->line,
+        .col = lexer->col,
+    };
+}
+
+bool lexer_done(const Lexer* lexer)
+{
+    return lexer->idx >= lexer->text_len;
+}
+
+Tok lexer_tok(const Lexer* lexer, TokTy ty, Loc loc)
+{
+    return (Tok) { .ty = ty, .loc = loc, .len = lexer->idx - loc.idx };
+}
+
+bool str_includes(const char* str, char ch)
+{
+    for (size_t i = 0; str[i] != '\0'; ++i) {
+        if (str[i] == ch) {
+            return true;
+        }
+    }
+    return false;
+}
+
+void pexpr_free(PExpr* expr)
+{
+    switch (expr->ty) {
+        case PExprTy_Err:
+        case PExprTy_Imm:
+            break;
+        case PExprTy_Ident:
+        case PExprTy_SubLabel:
+        case PExprTy_Str:
+            free(expr->str);
+            break;
+        case PExprTy_Mem:
+        case PExprTy_Not:
+        case PExprTy_Negate:
+            pexpr_free(expr->operand);
+            break;
+        case PExprTy_Or:
+        case PExprTy_Xor:
+        case PExprTy_And:
+        case PExprTy_Shl:
+        case PExprTy_Shr:
+        case PExprTy_Add:
+        case PExprTy_Sub:
+        case PExprTy_Mul:
+        case PExprTy_Div:
+        case PExprTy_Mod:
+            pexpr_free(expr->left);
+            pexpr_free(expr->right);
+            break;
+    }
+    free(expr);
+}
--- a/asm/parse.h
+++ b/asm/parse.h
@ -0,0 +1,90 @@
+#ifndef PARSE_H
+#define PARSE_H
+
+#include "report.h"
+#include <stddef.h>
+#include <stdint.h>
+
+typedef enum {
+    PExprTy_Err,
+    PExprTy_Ident,
+    PExprTy_SubLabel,
+    PExprTy_Imm,
+    PExprTy_Str,
+    PExprTy_Mem,
+    PExprTy_Not,
+    PExprTy_Negate,
+    PExprTy_Or,
+    PExprTy_Xor,
+    PExprTy_And,
+    PExprTy_Shl,
+    PExprTy_Shr,
+    PExprTy_Add,
+    PExprTy_Sub,
+    PExprTy_Mul,
+    PExprTy_Div,
+    PExprTy_Mod,
+} PExprTy;
+
+typedef struct PExpr PExpr;
+
+struct PExpr {
+    PExprTy ty;
+    Loc loc;
+    union {
+        char* str;
+        uint16_t imm;
+        PExpr* operand;
+        struct {
+            PExpr* left;
+            PExpr* right;
+        };
+    };
+};
+
+void pexpr_free(PExpr* expr);
+
+typedef struct {
+    Loc loc;
+    char* ident;
+    PExpr* value;
+} PConst;
+
+typedef struct {
+    Loc loc;
+    char* filename;
+} PInclude;
+
+typedef struct {
+    Loc loc;
+    char* ident;
+    bool local;
+} PLabel;
+
+typedef struct {
+    Loc loc;
+    char* ident;
+    PExpr* ops[2];
+    size_t ops_size;
+} PLine;
+
+typedef struct Parser Parser;
+
+Parser* parser_new(const char* filename, const char* text);
+void parser_free(Parser* parser);
+
+void parser_skip_newlines(Parser* parser);
+
+bool parser_next_is_const(Parser* parser);
+bool parser_next_is_include(Parser* parser);
+bool parser_next_is_label(Parser* parser);
+
+PConst* parser_parse_const(Parser* parser);
+PInclude* parser_parse_include(Parser* parser);
+PLabel* parser_parse_label(Parser* parser);
+PLine* parser_parse_line(Parser* parser);
+
+bool parser_done(const Parser* parser);
+bool parser_error_occured(const Parser* parser);
+
+#endif
--- a/asm/report.c
+++ b/asm/report.c
@ -0,0 +1,38 @@
+#include "report.h"
+#include <stdio.h>
+
+void loc_pretty_print(Loc loc, const char* text, size_t text_len)
+{
+    const char* displacement_spaces
+        = "                                                                "
+          "                                                                "
+          "                                                                "
+          "                                                                ";
+
+    size_t line_start = loc.idx;
+    while (line_start > 0 && text[line_start] != '\n') {
+        line_start -= 1;
+    }
+    if (text[line_start] == '\n') {
+        line_start += 1;
+    }
+    size_t line_end = loc.idx + 1;
+    while (line_end < text_len && text[line_end] != '\n') {
+        line_end += 1;
+    }
+    const char* line = &text[line_start];
+    int line_len = (int)line_end - (int)line_start;
+
+    fprintf(stderr,
+        "    \x1b[96m--> ./%s:%d:%d\n     "
+        "\x1b[37m|\n\x1b[96m%5d\x1b[37m|\x1b[0m%.*s\n     "
+        "\x1b[37m|%.*s\x1b[1;91m^\x1b[0m\n",
+        loc.filename,
+        loc.line,
+        loc.col,
+        loc.line,
+        line_len,
+        line,
+        loc.col - 1,
+        displacement_spaces);
+}
--- a/asm/report.h
+++ b/asm/report.h
@ -0,0 +1,20 @@
+#ifndef REPORT_H
+#define REPORT_H
+
+#include <stddef.h>
+#include <stdio.h>
+
+#define FMT_ERROR(FMT) "\x1b[1;91merror\x1b[1;97m: " FMT "\x1b[0m\n"
+#define FMT_WARNING(FMT) "\x1b[1;93mwarning\x1b[1;97m: " FMT "\x1b[0m\n"
+#define FMT_INFO(FMT) "\x1b[1;96minfo\x1b[1;97m: " FMT "\x1b[0m\n"
+
+typedef struct {
+    const char* filename;
+    size_t idx;
+    int line;
+    int col;
+} Loc;
+
+void loc_pretty_print(Loc loc, const char* text, size_t text_len);
+
+#endif