From f21a08aeb6831fef21b762fd1fc007596f9d39d0 Mon Sep 17 00:00:00 2001 From: sfj Date: Mon, 5 Jan 2026 16:33:31 +0100 Subject: [PATCH] init --- .clang-format | 14 + .gitignore | 1 + Makefile | 14 + compile_flags.txt | 9 + example.script | 3 + main.cpp | 631 ++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 672 insertions(+) create mode 100644 .clang-format create mode 100644 .gitignore create mode 100644 Makefile create mode 100644 compile_flags.txt create mode 100644 example.script create mode 100644 main.cpp diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000..a56cbd0 --- /dev/null +++ b/.clang-format @@ -0,0 +1,14 @@ +Language: Cpp +BasedOnStyle: WebKit +IndentWidth: 4 +ColumnLimit: 80 +IndentCaseLabels: true +InsertNewlineAtEOF: true +AllowShortFunctionsOnASingleLine: None + +BinPackArguments: false +AllowAllArgumentsOnNextLine: true + +BinPackParameters: false +AllowAllParametersOfDeclarationOnNextLine: true + diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c46263f --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +program diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..26db01d --- /dev/null +++ b/Makefile @@ -0,0 +1,14 @@ + +CXXFLAGS = \ + -std=c++23 \ + -Wall -Wextra \ + -pedantic-errors \ + -fsanitize=address,undefined + +program: main.cpp + g++ $^ -o $@ $(CXXFLAGS) + +run: program + ./program example.script + + diff --git a/compile_flags.txt b/compile_flags.txt new file mode 100644 index 0000000..863957f --- /dev/null +++ b/compile_flags.txt @@ -0,0 +1,9 @@ +-xc++ +-std=c++23 +-Wall +-Wextra +-Wpedantic +-Wconversion +-pedantic +-pedantic-errors + diff --git a/example.script b/example.script new file mode 100644 index 0000000..8b57e82 --- /dev/null +++ b/example.script @@ -0,0 +1,3 @@ + +print(+ 1 2) + diff --git a/main.cpp b/main.cpp new file mode 100644 index 0000000..a5cdf5f --- /dev/null +++ b/main.cpp @@ -0,0 +1,631 @@ +#include +#include +#include +#include +#include + +template +struct Result { + bool ok; + union { + V value; + E error; + }; +}; + +template +Result result_ok(V value) +{ + return Result { + .ok = true, + .value = value, + }; +} + +template +Result result_error(E error) +{ + return Result { + .ok = false, + .error = error, + }; +} + +template +using Deallocator = void (*)(T* ptr); + +template +struct Own { + T* ptr; + Deallocator dealloc; +}; + +template +Own own(T* ptr, Deallocator* dealloc) +{ + return Own { ptr, dealloc }; +} + +template +Own make_own(T init, Deallocator dealloc) +{ + T* ptr = (T*)malloc(sizeof(T)); + *ptr = init; + return Own { ptr, dealloc }; +} + +template +void own_dealloc(Own own) +{ + own.dealloc(own.ptr); +} + +template +struct Vec { + T* data; + size_t capacity; + size_t len; +}; + +template +void vec_init(Vec* vec) +{ + *vec = Vec { + .data = nullptr, + .capacity = 0, + .len = 0, + }; +} + +template +void vec_reserve(Vec* vec, size_t min_size) +{ + if (!vec->data) { + vec->capacity = 8; + vec->data = (T*)malloc(sizeof(T) * vec->capacity); + } + if (min_size > vec->capacity) { + while (min_size > vec->capacity) { + vec->capacity *= 2; + } + vec->data = (T*)realloc(vec->data, sizeof(T) * vec->capacity); + } +} + +template +void vec_push(Vec* vec, T v) +{ + vec_reserve(vec, vec->len + 1); + vec->data[vec->len] = v; + vec->len += 1; +} + +template +void vec_deinit(Vec* vec) +{ + if (vec->data) { + free(vec->data); + } +} + +typedef Vec String; + +void string_init(String* str) +{ + vec_init(str); + vec_push(str, '\0'); +} + +void string_deinit(String* str) +{ + vec_deinit(str); +} + +void string_push(String* str, char ch) +{ + vec_reserve(str, str->len + 2); + str->data[str->len] = ch; + str->data[str->len + 1] = '\0'; + str->len += 1; +} + +template +struct Span { + const T* data; + size_t len; +}; + +template +Span span(const T* data, size_t len) +{ + return Span { data, len }; +} + +typedef Span StrView; + +StrView string_to_view(const String* str) +{ + return span(str->data, str->len); +} + +StrView string_slice(const String* str, size_t begin, size_t count) +{ + return span(str->data + begin, count); +} + +StrView strview_slice(StrView str, size_t begin, size_t count) +{ + return span(str.data + begin, count); +} + +void string_from_strview(String* string, StrView view) { + vec_reserve(string, view.len + 1); + strncpy(string->data, view.data, view.len); + string->len = view.len; + string->data[string->len] = '\0'; +} + +template +int format(String* str, const char* fmt, Args... args) { + int res = snprintf(nullptr, 0, fmt, args...); + if (res < 0) + return 1; + + size_t size = (size_t)res; + vec_reserve(str, size + 1); + snprintf(str->data, size + 1, fmt, args...); + str->len = size; + str->data[str->len] = '\0'; + return 0; +} + +template +void string_cat_fmt(String* str, const char* fmt, Args... args) +{ + String formatted; + string_init(&formatted); + format(&formatted, fmt, args...); + vec_reserve(str, str->len + formatted.len + 1); + + strncat(str->data, formatted.data, formatted.len); + str->len += formatted.len; + str->data[str->len] = '\0'; + + string_deinit(&formatted); +} + +Result read_file_to_string(String* str, const char* filename) +{ + FILE* file = fopen(filename, "r"); + if (!file) { + String error_str; + string_init(&error_str); + format(&error_str, "couldn't open file '%s': %s", filename, strerror(errno)); + return result_error(error_str); + } + fseek(file, 0, SEEK_END); + size_t file_size = (size_t)ftell(file); + rewind(file); + + vec_reserve(str, file_size + 1); + size_t bytes_read = fread(str->data, 1, file_size, file); + str->len = bytes_read; + str->data[str->len] = '\0'; + fclose(file); + if (bytes_read != file_size) { + String error_str; + string_init(&error_str); + format(&error_str, "failed to read file"); + return result_error(error_str); + } + + return result_ok(bytes_read); +} + +enum TokTy { + TT_Eof, + TT_Ident, + TT_Int, + TT_LParen = '(', + TT_RParen = ')', + TT_Plus = '+', + TT_Comma = ',', +}; + +struct Tok { + TokTy ty; + StrView text; + int line; +}; + +struct Lexer { + StrView text; + size_t idx; + int line; + char ch; +}; + +void lexer_init(Lexer* lexer, StrView text) +{ + *lexer = Lexer { + .text = text, + .idx = 0, + .line = 1, + .ch = text.data[0], + }; +} + +static bool lexer_done(const Lexer* lexer) +{ + return lexer->idx >= lexer->text.len; +} + +static void lexer_step(Lexer* lexer) +{ + if (lexer_done(lexer)) + return; + if (lexer->ch == '\n') { + lexer->line += 1; + } + lexer->idx += 1; + lexer->ch = lexer->text.data[lexer->idx]; +} + +static Tok lexer_tok(Lexer* lexer, TokTy ty, size_t idx, int line) +{ + return Tok { ty, strview_slice(lexer->text, idx, lexer->idx - idx), line }; +} + +Tok lexer_next(Lexer* lexer) +{ + size_t idx = lexer->idx; + int line = lexer->line; + if (lexer_done(lexer)) { + return lexer_tok(lexer, TT_Eof, idx, line); + } + if (isspace(lexer->ch)) { + while (!lexer_done(lexer) && isspace(lexer->ch)) { + lexer_step(lexer); + } + return lexer_next(lexer); + } + if (isdigit(lexer->ch)) { + while (!lexer_done(lexer) && isdigit(lexer->ch)) { + lexer_step(lexer); + } + return lexer_tok(lexer, TT_Int, idx, line); + } + if (isalpha(lexer->ch)) { + while (!lexer_done(lexer) && isalnum(lexer->ch)) { + lexer_step(lexer); + } + return lexer_tok(lexer, TT_Ident, idx, line); + } + const char* static_toks = "()+,"; + for (size_t i = 0; i < strlen(static_toks); ++i) { + if (lexer->ch == static_toks[i]) { + lexer_step(lexer); + return lexer_tok(lexer, (TokTy)static_toks[i], idx, line); + } + } + fprintf(stderr, "error: illegal character '%c' on line %d\n", lexer->ch, line); + lexer_step(lexer); + return lexer_next(lexer); +} + +enum ExprTy { + ET_Error, + ET_Ident, + ET_Int, + ET_Call, + ET_Add, +}; + +struct Expr; + +struct CallExpr { + Own expr; + Vec> args; +}; + +struct AddExpr { + Own left; + Own right; +}; + +struct Expr { + ExprTy ty; + int line; + union { + int nothing; + String ident_value; + int64_t int_value; + CallExpr call_expr; + AddExpr add_expr; + }; +}; + +static void expr_vec_deinit(Vec>* exprs); + +void expr_deinit(Expr* expr) +{ + switch (expr->ty) { + case ET_Error: + break; + case ET_Ident: + string_deinit(&expr->ident_value); + break; + case ET_Int: + break; + case ET_Call: + own_dealloc(expr->call_expr.expr); + expr_vec_deinit(&expr->call_expr.args); + break; + case ET_Add: + own_dealloc(expr->add_expr.left); + own_dealloc(expr->add_expr.right); + break; + } +} + +static void expr_vec_deinit(Vec>* exprs) +{ + for (size_t i = 0; i < exprs->len; ++i) { + own_dealloc(exprs->data[i]); + } + vec_deinit(exprs); +} + +void expr_free(Expr* expr) +{ + expr_deinit(expr); + free(expr); +} + +void expr_to_string(String* str, const Expr* expr) +{ + switch (expr->ty){ + case ET_Error: + string_cat_fmt(str, ""); + break; + case ET_Ident: + string_cat_fmt(str, "Ident(\"%s\")", expr->ident_value.data); + break; + case ET_Int: + string_cat_fmt(str, "Int(%ld)", expr->int_value); + break; + case ET_Call: + string_cat_fmt(str, "Call { expr: "); + expr_to_string(str, expr->call_expr.expr.ptr); + string_cat_fmt(str, ", args: ["); + for (size_t i = 0; i < expr->call_expr.args.len; ++i) { + if (i != 0) { + string_cat_fmt(str, ", "); + } + expr_to_string(str, expr->call_expr.args.data[i].ptr); + } + string_cat_fmt(str, "] }"); + break; + case ET_Add: + string_cat_fmt(str, "Add { left: "); + expr_to_string(str, expr->add_expr.left.ptr); + string_cat_fmt(str, ", right: "); + expr_to_string(str, expr->add_expr.right.ptr); + string_cat_fmt(str, " }"); + break; + } +} + +struct Parser { + Lexer lexer; + Tok tok; +}; + +void parser_init(Parser* parser, StrView text) +{ + Lexer lexer; + lexer_init(&lexer, text); + Tok tok = lexer_next(&lexer); + *parser = Parser { + .lexer = lexer, + .tok = tok, + }; +} + +static Own parser_parse_postfix(Parser* parser); +static Own parser_parse_prefix(Parser* parser); +static Own parser_parse_operand(Parser* parser); + +static void parser_step(Parser* parser) +{ + parser->tok = lexer_next(&parser->lexer); +} + +Own parser_parse_expr(Parser* parser) +{ + return parser_parse_prefix(parser); +} + +static Own parser_parse_prefix(Parser* parser) +{ + int line = parser->tok.line; + if (parser->tok.ty == '+') { + parser_step(parser); + Own left = parser_parse_prefix(parser); + Own right = parser_parse_prefix(parser); + + return make_own(Expr { + .ty = ET_Add, + .line = line, + .add_expr = AddExpr { + .left = left, + .right = right, + } + }, expr_free); + } + + return parser_parse_postfix(parser); +} + +static Own parser_parse_postfix(Parser* parser) +{ + int line = parser->tok.line; + Own expr = parser_parse_operand(parser); + while (true) { + if (parser->tok.ty == '(') { + parser_step(parser); + + Vec> args; + vec_init(&args); + + if (parser->tok.ty != TT_Eof && parser->tok.ty != ')') { + vec_push(&args, parser_parse_expr(parser)); + while (parser->tok.ty != TT_Eof && parser->tok.ty != ')') { + if (parser->tok.ty != TT_Comma) { + fprintf(stderr, "error: expected ',' on line %d\n", line); + return make_own( + Expr { .ty = ET_Error, .line = line, .nothing = 0 }, expr_free); + } + parser_step(parser); + if (parser->tok.ty == TT_Eof || parser->tok.ty == ')') + break; + vec_push(&args, parser_parse_expr(parser)); + } + } + if (parser->tok.ty != ')') { + fprintf(stderr, "error: expected ')' on line %d\n", line); + return make_own( + Expr { .ty = ET_Error, .line = line, .nothing = 0 }, expr_free); + } + parser_step(parser); + + expr = make_own(Expr { + .ty = ET_Call, + .line = line, + .call_expr = CallExpr { + .expr = expr, + .args = args, + }, + }, expr_free); + + continue; + } + break; + } + return expr; +} + +static Own parser_parse_operand(Parser* parser) +{ + int line = parser->tok.line; + if (parser->tok.ty == TT_Ident) { + String value; + string_init(&value); + string_from_strview(&value, parser->tok.text); + + parser_step(parser); + return make_own(Expr { + .ty = ET_Ident, + .line = line, + .ident_value = value, + }, expr_free); + + } else if (parser->tok.ty == TT_Int) { + String text; + string_init(&text); + string_from_strview(&text, parser->tok.text); + int64_t value = strtoll(text.data, nullptr, 10); + string_deinit(&text); + + parser_step(parser); + return make_own(Expr { + .ty = ET_Int, + .line = line, + .int_value = value, + }, expr_free); + } else { + if (parser->tok.ty == TT_Eof) { + fprintf(stderr, "error: expected expression, got EOF on line %d\n", parser->tok.line); + } else { + fprintf(stderr, "error: expected expression, got '%.*s' on line %d\n", (int)parser->tok.text.len, parser->tok.text.data, parser->tok.line); + } + parser_step(parser); + } + return make_own( + Expr { .ty = ET_Error, .line = line, .nothing = 0 }, expr_free); +} + +int64_t eval_expr(const Expr* expr) +{ + switch (expr->ty) { + case ET_Error: + return -1; + case ET_Ident: + return -1; + case ET_Int: + return expr->int_value; + case ET_Call: + if (expr->call_expr.expr.ptr->ty == ET_Ident) { + if (strcmp(expr->call_expr.expr.ptr->ident_value.data, "print") == 0) { + int64_t value = eval_expr(expr->call_expr.args.data[0].ptr); + printf("%ld\n", value); + return 0; + } + } + return -1; + case ET_Add: { + int64_t left = eval_expr(expr->add_expr.left.ptr); + int64_t right = eval_expr(expr->add_expr.right.ptr); + return left + right; + } + } + return -1; +} + +int main(int argc, const char** argv) +{ + if (argc <= 1) { + fprintf(stderr, "error: no filename\n"); + return EXIT_FAILURE; + } + const char* filename = argv[1]; + + String text; + string_init(&text); + auto result = read_file_to_string(&text, filename); + + if (!result.ok) { + fprintf(stderr, "error: %s\n", result.error.data); + string_deinit(&result.error); + return EXIT_FAILURE; + } + + printf("=== text ===\n%s\n", text.data); + + Lexer lexer; + lexer_init(&lexer, string_to_view(&text)); + + printf("=== tokens ===\n"); + Tok tok; + while ((tok = lexer_next(&lexer)).ty != TT_Eof) { + printf("%d\t\"%.*s\"\n", tok.ty, (int)tok.text.len, tok.text.data); + } + + printf("=== ast ===\n"); + Parser parser; + parser_init(&parser, string_to_view(&text)); + Own ast = parser_parse_expr(&parser); + + String ast_string; + string_init(&ast_string); + expr_to_string(&ast_string, ast.ptr); + printf("%s\n", ast_string.data); + string_deinit(&ast_string); + + printf("=== eval ===\n"); + eval_expr(ast.ptr); + + own_dealloc(ast); + string_deinit(&text); +} +