From f4e1f65e8543863878f38ec64d6130b3181a42b0 Mon Sep 17 00:00:00 2001 From: SimonFJ20 Date: Tue, 29 Aug 2023 23:17:15 +0200 Subject: [PATCH] code split --- build.sh | 2 +- compile_files.txt | 7 + debug.sh | 2 +- emitter.c | 190 +++++++++++ emitter.h | 25 ++ expr.c | 192 ++++++++++++ expr.h | 49 +++ main.c | 783 ++-------------------------------------------- optimizer.c | 222 +++++++++++++ optimizer.h | 18 ++ parser.c | 169 ++++++++++ parser.h | 55 ++++ print.h | 27 ++ runtime.c | 7 + runtime.h | 9 + 15 files changed, 993 insertions(+), 764 deletions(-) create mode 100644 compile_files.txt create mode 100644 emitter.c create mode 100644 emitter.h create mode 100644 expr.c create mode 100644 expr.h create mode 100644 optimizer.c create mode 100644 optimizer.h create mode 100644 parser.c create mode 100644 parser.h create mode 100644 print.h create mode 100644 runtime.c create mode 100644 runtime.h diff --git a/build.sh b/build.sh index 82751fd..75df40e 100644 --- a/build.sh +++ b/build.sh @@ -1,4 +1,4 @@ #!/bin/bash -gcc -o program main.c -std=c17 -Wall -Wextra -Wpedantic -O3 +gcc -o program -std=c17 -Wall -Wextra -Wpedantic -O3 `cat compile_files.txt` diff --git a/compile_files.txt b/compile_files.txt new file mode 100644 index 0000000..714ecba --- /dev/null +++ b/compile_files.txt @@ -0,0 +1,7 @@ +main.c +expr.c +parser.c +optimizer.c +emitter.c +runtime.c + diff --git a/debug.sh b/debug.sh index 7550f41..8f072b2 100644 --- a/debug.sh +++ b/debug.sh @@ -2,7 +2,7 @@ set -xe -gcc -o program main.c -std=c17 -Wall -Wextra -Wpedantic -O3 -g -fsanitize=address,undefined +gcc -o program -std=c17 -Wall -Wextra -Wpedantic -O3 -g -fsanitize=address,undefined `cat compile_files.txt` ./program diff --git a/emitter.c b/emitter.c new file mode 100644 index 0000000..7bcae50 --- /dev/null +++ b/emitter.c @@ -0,0 +1,190 @@ +#include "emitter.h" +#include "expr.h" +#include "runtime.h" +#include +#include + +Emitter emitter_create(uint8_t* code_address) +{ + return (Emitter) { + .code = code_address, + .pos = 0, + .loop_counter = 0, + .cmp_flags_set = false, + }; +} + +void emitter_push_u8(Emitter* emitter, uint8_t value) +{ + emitter->code[emitter->pos] = value; + emitter->pos += 1; +} + +void emitter_push_u32(Emitter* emitter, uint32_t value) +{ + emitter->code[emitter->pos] = value & 0xFF; + emitter->pos += 1; + emitter->code[emitter->pos] = (value >> 8) & 0xFF; + emitter->pos += 1; + emitter->code[emitter->pos] = (value >> 16) & 0xFF; + emitter->pos += 1; + emitter->code[emitter->pos] = value >> 24; + emitter->pos += 1; +} + +void emitter_push_u64(Emitter* emitter, uint64_t value) +{ + emitter->code[emitter->pos] = value & 0xFF; + emitter->pos += 1; + emitter->code[emitter->pos] = (value >> 8) & 0xFF; + emitter->pos += 1; + emitter->code[emitter->pos] = (value >> 16) & 0xFF; + emitter->pos += 1; + emitter->code[emitter->pos] = (value >> 24) & 0xFF; + emitter->pos += 1; + emitter->code[emitter->pos] = (value >> 32) & 0xFF; + emitter->pos += 1; + emitter->code[emitter->pos] = (value >> 40) & 0xFF; + emitter->pos += 1; + emitter->code[emitter->pos] = (value >> 48) & 0xFF; + emitter->pos += 1; + emitter->code[emitter->pos] = (value >> 56) & 0xFF; + emitter->pos += 1; +} + +void emitter_emit_expr(Emitter* emitter, Expr* expr) +{ + emitter->cmp_flags_set = false; + switch (expr->type) { + case ExprType_Error: + fprintf(stderr, "panic: emitter: program contained errors\n"); + exit(1); + break; + case ExprType_Incr: + // add BYTE [rbx], 1 + emitter_push_u8(emitter, 0x80); + emitter_push_u8(emitter, 0x03); + emitter_push_u8(emitter, expr->value); + emitter->cmp_flags_set = true; + break; + case ExprType_Decr: + // sub BYTE [rbx], 1 + emitter_push_u8(emitter, 0x80); + emitter_push_u8(emitter, 0x2b); + emitter_push_u8(emitter, expr->value); + emitter->cmp_flags_set = true; + break; + case ExprType_Left: + // sub rbx, 1 + emitter_push_u8(emitter, 0x48); + emitter_push_u8(emitter, 0x83); + emitter_push_u8(emitter, 0xeb); + emitter_push_u8(emitter, expr->value); + break; + case ExprType_Right: + // add rbx, 1 + emitter_push_u8(emitter, 0x48); + emitter_push_u8(emitter, 0x83); + emitter_push_u8(emitter, 0xc3); + emitter_push_u8(emitter, expr->value); + break; + case ExprType_Output: + // movzx edi, BYTE [rbx] + emitter_push_u8(emitter, 0x0f); + emitter_push_u8(emitter, 0xb6); + emitter_push_u8(emitter, 0x3b); + // movabs rax, + emitter_push_u8(emitter, 0x48); + emitter_push_u8(emitter, 0xb8); + emitter_push_u64(emitter, (uint64_t)put_char); + // call rax + emitter_push_u8(emitter, 0xff); + emitter_push_u8(emitter, 0xd0); + break; + case ExprType_Input: + // movabs rax, + emitter_push_u8(emitter, 0x48); + emitter_push_u8(emitter, 0xb8); + emitter_push_u64(emitter, (uint64_t)get_char); + // call rax + emitter_push_u8(emitter, 0xff); + emitter_push_u8(emitter, 0xd0); + // mov BYTE [rbx], al + emitter_push_u8(emitter, 0x88); + emitter_push_u8(emitter, 0x03); + break; + case ExprType_Loop: + fprintf(stderr, "panic: emitter: unexpected loop\n"); + exit(1); + break; + case ExprType_Zero: + // mov BYTE [rbx], 0 + emitter_push_u8(emitter, 0xc6); + emitter_push_u8(emitter, 0x03); + emitter_push_u8(emitter, 0x00); + emitter->cmp_flags_set = true; + break; + } +} + +void emitter_emit_loop(Emitter* emitter, Expr* expr) +{ + int64_t start_loc = (int64_t)&emitter->code[emitter->pos]; + emitter_emit_expr_vec(emitter, &expr->exprs); + if (!emitter->cmp_flags_set) { + // cmp BYTE [rbx], 0 + emitter_push_u8(emitter, 0x80); + emitter_push_u8(emitter, 0x3b); + emitter_push_u8(emitter, 0x00); + } + + int64_t current_loc = (int64_t)&emitter->code[emitter->pos]; + int32_t relative_address = -(int32_t)(current_loc - start_loc); + if (relative_address >= -127) { + // jne + emitter_push_u8(emitter, 0x75); + emitter_push_u8(emitter, (uint8_t)relative_address - 2); + } else { + // jne + emitter_push_u8(emitter, 0x0f); + emitter_push_u8(emitter, 0x85); + emitter_push_u32(emitter, (uint32_t)relative_address - 6); + } +} + +void emitter_emit_expr_vec(Emitter* emitter, ExprVec* vec) +{ + for (size_t i = 0; i < vec->length; ++i) { + Expr* expr = &vec->data[i]; + if (expr->type == ExprType_Loop) { + emitter_emit_loop(emitter, expr); + } else { + emitter_emit_expr(emitter, expr); + } + } +} + +void emitter_emit_program(Emitter* emitter, ExprVec* program) +{ + // push rbp: + emitter_push_u8(emitter, 0x55); + // mov rbp, rsp + emitter_push_u8(emitter, 0x48); + emitter_push_u8(emitter, 0x89); + emitter_push_u8(emitter, 0xe5); + // push rbx: + emitter_push_u8(emitter, 0x53); + // mov rbx, rdi + emitter_push_u8(emitter, 0x48); + emitter_push_u8(emitter, 0x89); + emitter_push_u8(emitter, 0xfb); + + emitter_emit_expr_vec(emitter, program); + + // pop rbx + emitter_push_u8(emitter, 0x5b); + // pop rbx + emitter_push_u8(emitter, 0x5d); + // ret + emitter_push_u8(emitter, 0xc3); +} diff --git a/emitter.h b/emitter.h new file mode 100644 index 0000000..72111dd --- /dev/null +++ b/emitter.h @@ -0,0 +1,25 @@ +#ifndef EMITTER_H +#define EMITTER_H + +#include "expr.h" +#include +#include +#include + +typedef struct { + uint8_t* code; + size_t pos; + int loop_counter; + bool cmp_flags_set; +} Emitter; + +Emitter emitter_create(uint8_t* code_address); +void emitter_push_u8(Emitter* emitter, uint8_t value); +void emitter_push_u32(Emitter* emitter, uint32_t value); +void emitter_push_u64(Emitter* emitter, uint64_t value); +void emitter_emit_expr(Emitter* emitter, Expr* expr); +void emitter_emit_loop(Emitter* emitter, Expr* expr); +void emitter_emit_expr_vec(Emitter* emitter, ExprVec* vec); +void emitter_emit_program(Emitter* emitter, ExprVec* program); + +#endif diff --git a/expr.c b/expr.c new file mode 100644 index 0000000..050d2d7 --- /dev/null +++ b/expr.c @@ -0,0 +1,192 @@ +#include "expr.h" +#include "print.h" +#include +#include +#include +#include + +void expr_vec_construct(ExprVec* vec) +{ + *vec = (ExprVec) { + .data = malloc(sizeof(Expr) * 8), + .capacity = 8, + .length = 0, + }; +} + +void expr_vec_destroy(ExprVec* vec) { free(vec->data); } + +void expr_vec_free(ExprVec* vec) +{ + for (size_t i = 0; i < vec->length; ++i) { + expr_free(&vec->data[i]); + } + expr_vec_destroy(vec); +} + +void expr_vec_push(ExprVec* vec, Expr expr) +{ + if (vec->length + 1 > vec->capacity) { + vec->capacity *= 2; + vec->data = realloc(vec->data, sizeof(Expr) * vec->capacity); + } + vec->data[vec->length] = expr; + vec->length += 1; +} + +Expr expr_vec_pop(ExprVec* vec) +{ + vec->length -= 1; + return vec->data[vec->length]; +} + +bool expr_vec_equal(const ExprVec* self, const ExprVec* other) +{ + if (self->length != other->length) { + return false; + } + for (size_t i = 0; i < self->length; ++i) { + if (!expr_equal(&self->data[i], &other->data[i])) { + return false; + } + } + return true; +} + +void expr_free(Expr* expr) +{ + switch (expr->type) { + case ExprType_Loop: + expr_vec_free(&expr->exprs); + break; + default: + break; + } +} + +const char* expr_bracket_color(int depth) +{ + switch (depth % 3) { + case 0: + return color_bright_yellow; + case 1: + return color_magenta; + case 2: + return color_cyan; + } + return NULL; +} + +void expr_stringify_concat_value(Expr* expr, char* acc, int depth) +{ + strcat(acc, color_bold); + strcat(acc, expr_bracket_color(depth)); + strcat(acc, "("); + strcat(acc, color_reset); + char value[16] = { 0 }; + snprintf(value, 16, "%d", expr->value); + strcat(acc, value); + strcat(acc, color_bold); + strcat(acc, expr_bracket_color(depth)); + strcat(acc, ")"); + strcat(acc, color_reset); +} + +void expr_vec_stringify(ExprVec* vec, char* acc, int depth) +{ + strcat(acc, color_bold); + strcat(acc, expr_bracket_color(depth)); + strcat(acc, "["); + strcat(acc, color_reset); + for (size_t i = 0; i < vec->length; ++i) { + if (i != 0) { + strcat(acc, " "); + } + expr_stringify(&vec->data[i], acc, depth + 1); + } + strcat(acc, color_bold); + strcat(acc, expr_bracket_color(depth)); + strcat(acc, "]"); + strcat(acc, color_reset); +} + +void expr_stringify(Expr* expr, char* acc, int depth) +{ + switch (expr->type) { + case ExprType_Error: + strcat(acc, color_bright_red); + strcat(acc, "Error"); + strcat(acc, color_reset); + break; + case ExprType_Incr: + strcat(acc, color_yellow); + strcat(acc, "Incr"); + strcat(acc, color_reset); + expr_stringify_concat_value(expr, acc, depth); + break; + case ExprType_Decr: + strcat(acc, color_yellow); + strcat(acc, "Decr"); + strcat(acc, color_reset); + expr_stringify_concat_value(expr, acc, depth); + break; + case ExprType_Left: + strcat(acc, color_green); + strcat(acc, "Left"); + strcat(acc, color_reset); + expr_stringify_concat_value(expr, acc, depth); + break; + case ExprType_Right: + strcat(acc, color_green); + strcat(acc, "Right"); + strcat(acc, color_reset); + expr_stringify_concat_value(expr, acc, depth); + break; + case ExprType_Output: + strcat(acc, color_bright_gray); + strcat(acc, "Output"); + strcat(acc, color_reset); + break; + case ExprType_Input: + strcat(acc, color_bright_gray); + strcat(acc, "Input"); + strcat(acc, color_reset); + break; + case ExprType_Loop: + strcat(acc, color_bright_red); + strcat(acc, "Loop"); + expr_vec_stringify(&expr->exprs, acc, depth); + strcat(acc, color_reset); + break; + case ExprType_Zero: + strcat(acc, color_yellow); + strcat(acc, "Zero"); + strcat(acc, color_reset); + break; + } +} + +bool expr_equal(const Expr* self, const Expr* other) +{ + if (self->type != other->type) { + return false; + } + switch (self->type) { + case ExprType_Incr: + case ExprType_Decr: + case ExprType_Left: + case ExprType_Right: + if (self->value != other->value) { + return false; + } + break; + case ExprType_Loop: + if (!expr_vec_equal(&self->exprs, &other->exprs)) { + return false; + } + break; + default: + break; + } + return true; +} diff --git a/expr.h b/expr.h new file mode 100644 index 0000000..8b6c2ad --- /dev/null +++ b/expr.h @@ -0,0 +1,49 @@ +#ifndef EXPR_H +#define EXPR_H + +#include +#include + +typedef enum { + ExprType_Error, + ExprType_Incr, + ExprType_Decr, + ExprType_Left, + ExprType_Right, + ExprType_Output, + ExprType_Input, + ExprType_Loop, + ExprType_Zero, +} ExprType; + +typedef struct Expr Expr; + +typedef struct ExprVec { + Expr* data; + size_t capacity; + size_t length; +} ExprVec; + +void expr_vec_construct(ExprVec* vec); +void expr_vec_destroy(ExprVec* vec); +void expr_vec_free(ExprVec* vec); +void expr_vec_push(ExprVec* vec, Expr expr); +Expr expr_vec_pop(ExprVec* vec); +bool expr_vec_equal(const ExprVec* self, const ExprVec* other); + +struct Expr { + ExprType type; + union { + int value; + ExprVec exprs; + }; +}; + +void expr_free(Expr* expr); +const char* expr_bracket_color(int depth); +void expr_stringify_concat_value(Expr* expr, char* acc, int depth); +void expr_vec_stringify(ExprVec* vec, char* acc, int depth); +void expr_stringify(Expr* expr, char* acc, int depth); +bool expr_equal(const Expr* self, const Expr* other); + +#endif diff --git a/main.c b/main.c index 51eb875..aac86a8 100644 --- a/main.c +++ b/main.c @@ -1,3 +1,8 @@ +#include "emitter.h" +#include "expr.h" +#include "optimizer.h" +#include "parser.h" +#include "print.h" #include #include #include @@ -5,766 +10,17 @@ #include #include -typedef enum { - Token_Eof, - Token_Plus, - Token_Minus, - Token_LT, - Token_GT, - Token_Dot, - Token_Comma, - Token_LBracket, - Token_RBracket -} Token; - -typedef struct { - const char* text; - size_t index; - size_t length; -} Lexer; - -Lexer lexer_create(const char* text, size_t length) +int main(int argc, char** argv) { - return (Lexer) { .text = text, .index = 0, .length = length }; -} - -void lexer_step(Lexer* lexer) { lexer->index += 1; } - -Token lexer_next(Lexer* lexer) -{ - if (lexer->index >= lexer->length) { - return Token_Eof; - } - switch (lexer->text[lexer->index]) { - case '+': - return (lexer_step(lexer), Token_Plus); - case '-': - return (lexer_step(lexer), Token_Minus); - case '<': - return (lexer_step(lexer), Token_LT); - case '>': - return (lexer_step(lexer), Token_GT); - case '.': - return (lexer_step(lexer), Token_Dot); - case ',': - return (lexer_step(lexer), Token_Comma); - case '[': - return (lexer_step(lexer), Token_LBracket); - case ']': - return (lexer_step(lexer), Token_RBracket); - default: - return (lexer_step(lexer), lexer_next(lexer)); - } -} - -typedef enum { - ExprType_Error, - ExprType_Incr, - ExprType_Decr, - ExprType_Left, - ExprType_Right, - ExprType_Output, - ExprType_Input, - ExprType_Loop, - ExprType_Zero, -} ExprType; - -typedef struct Expr Expr; - -typedef struct ExprVec { - Expr* data; - size_t capacity; - size_t length; -} ExprVec; - -struct Expr { - ExprType type; - union { - int value; - ExprVec exprs; - }; -}; - -void expr_vec_construct(ExprVec* vec) -{ - *vec = (ExprVec) { - .data = malloc(sizeof(Expr) * 8), - .capacity = 8, - .length = 0, - }; -} -void expr_vec_destroy(ExprVec* vec) { free(vec->data); } - -void expr_free(Expr* expr); -void expr_vec_free(ExprVec* vec) -{ - for (size_t i = 0; i < vec->length; ++i) { - expr_free(&vec->data[i]); - } - expr_vec_destroy(vec); -} - -void expr_vec_push(ExprVec* vec, Expr expr) -{ - if (vec->length + 1 > vec->capacity) { - vec->capacity *= 2; - vec->data = realloc(vec->data, sizeof(Expr) * vec->capacity); - } - vec->data[vec->length] = expr; - vec->length += 1; -} -Expr expr_vec_pop(ExprVec* vec) -{ - vec->length -= 1; - return vec->data[vec->length]; -} - -void expr_free(Expr* expr) -{ - switch (expr->type) { - case ExprType_Loop: - expr_vec_free(&expr->exprs); - break; - default: - break; - } -} - -const char* color_reset = "\x1b[0m"; -const char* color_bold = "\x1b[1m"; - -const char* color_black = "\x1b[30m"; -const char* color_red = "\x1b[31m"; -const char* color_green = "\x1b[32m"; -const char* color_yellow = "\x1b[33m"; -const char* color_blue = "\x1b[34m"; -const char* color_magenta = "\x1b[35m"; -const char* color_cyan = "\x1b[36m"; -const char* color_bright_gray = "\x1b[37m"; - -const char* color_gray = "\x1b[90m"; -const char* color_bright_red = "\x1b[91m"; -const char* color_bright_green = "\x1b[92m"; -const char* color_bright_yellow = "\x1b[93m"; -const char* color_bright_blue = "\x1b[94m"; -const char* color_bright_magenta = "\x1b[95m"; -const char* color_bright_cyan = "\x1b[96m"; -const char* color_white = "\x1b[97m"; - -const char* expr_bracket_color(int depth) -{ - switch (depth % 3) { - case 0: - return color_bright_yellow; - case 1: - return color_magenta; - case 2: - return color_cyan; - } - return NULL; -} - -void expr_stringify_concat_value(Expr* expr, char* acc, int depth) -{ - strcat(acc, color_bold); - strcat(acc, expr_bracket_color(depth)); - strcat(acc, "("); - strcat(acc, color_reset); - char value[16] = { 0 }; - snprintf(value, 16, "%d", expr->value); - strcat(acc, value); - strcat(acc, color_bold); - strcat(acc, expr_bracket_color(depth)); - strcat(acc, ")"); - strcat(acc, color_reset); -} - -void expr_stringify(Expr* expr, char* acc, int depth); - -void expr_vec_stringify(ExprVec* vec, char* acc, int depth) -{ - strcat(acc, color_bold); - strcat(acc, expr_bracket_color(depth)); - strcat(acc, "["); - strcat(acc, color_reset); - for (size_t i = 0; i < vec->length; ++i) { - if (i != 0) { - strcat(acc, " "); - } - expr_stringify(&vec->data[i], acc, depth + 1); - } - strcat(acc, color_bold); - strcat(acc, expr_bracket_color(depth)); - strcat(acc, "]"); - strcat(acc, color_reset); -} - -void expr_stringify(Expr* expr, char* acc, int depth) -{ - switch (expr->type) { - case ExprType_Error: - strcat(acc, color_bright_red); - strcat(acc, "Error"); - strcat(acc, color_reset); - break; - case ExprType_Incr: - strcat(acc, color_yellow); - strcat(acc, "Incr"); - strcat(acc, color_reset); - expr_stringify_concat_value(expr, acc, depth); - break; - case ExprType_Decr: - strcat(acc, color_yellow); - strcat(acc, "Decr"); - strcat(acc, color_reset); - expr_stringify_concat_value(expr, acc, depth); - break; - case ExprType_Left: - strcat(acc, color_green); - strcat(acc, "Left"); - strcat(acc, color_reset); - expr_stringify_concat_value(expr, acc, depth); - break; - case ExprType_Right: - strcat(acc, color_green); - strcat(acc, "Right"); - strcat(acc, color_reset); - expr_stringify_concat_value(expr, acc, depth); - break; - case ExprType_Output: - strcat(acc, color_bright_gray); - strcat(acc, "Output"); - strcat(acc, color_reset); - break; - case ExprType_Input: - strcat(acc, color_bright_gray); - strcat(acc, "Input"); - strcat(acc, color_reset); - break; - case ExprType_Loop: - strcat(acc, color_bright_red); - strcat(acc, "Loop"); - expr_vec_stringify(&expr->exprs, acc, depth); - strcat(acc, color_reset); - break; - case ExprType_Zero: - strcat(acc, color_yellow); - strcat(acc, "Zero"); - strcat(acc, color_reset); - break; - } -} - -bool expr_equal(const Expr* self, const Expr* other); - -bool expr_vec_equal(const ExprVec* self, const ExprVec* other) -{ - if (self->length != other->length) { - return false; - } - for (size_t i = 0; i < self->length; ++i) { - if (!expr_equal(&self->data[i], &other->data[i])) { - return false; - } - } - return true; -} - -bool expr_equal(const Expr* self, const Expr* other) -{ - if (self->type != other->type) { - return false; - } - switch (self->type) { - case ExprType_Incr: - case ExprType_Decr: - case ExprType_Left: - case ExprType_Right: - if (self->value != other->value) { - return false; - } - break; - case ExprType_Loop: - if (!expr_vec_equal(&self->exprs, &other->exprs)) { - return false; - } - break; - default: - break; - } - return true; -} - -typedef struct { - Lexer lexer; - Token current; -} Parser; - -Parser parser_create(const char* text, size_t length) -{ - Lexer lexer = lexer_create(text, length); - return (Parser) { - .lexer = lexer, - .current = lexer_next(&lexer), - }; -} - -void parser_step(Parser* parser) -{ - parser->current = lexer_next(&parser->lexer); -} - -Expr parser_parse_expr(Parser* parser); - -Expr parser_parse_loop(Parser* parser) -{ - parser_step(parser); - ExprVec exprs; - expr_vec_construct(&exprs); - while (parser->current != Token_Eof && parser->current != Token_RBracket) { - expr_vec_push(&exprs, parser_parse_expr(parser)); - } - if (parser->current != Token_RBracket) { - return (Expr) { .type = ExprType_Error }; - } - parser_step(parser); - return (Expr) { .type = ExprType_Loop, .exprs = exprs }; -} - -Expr parser_parse_expr(Parser* parser) -{ - switch (parser->current) { - case Token_Plus: - return ( - parser_step(parser), - (Expr) { .type = ExprType_Incr, .value = 1 } - ); - case Token_Minus: - return ( - parser_step(parser), - (Expr) { .type = ExprType_Decr, .value = 1 } - ); - case Token_LT: - return ( - parser_step(parser), - (Expr) { .type = ExprType_Left, .value = 1 } - ); - case Token_GT: - return ( - parser_step(parser), - (Expr) { .type = ExprType_Right, .value = 1 } - ); - case Token_Dot: - return (parser_step(parser), (Expr) { .type = ExprType_Output }); - case Token_Comma: - return (parser_step(parser), (Expr) { .type = ExprType_Input }); - case Token_LBracket: - return parser_parse_loop(parser); - default: - return (parser_step(parser), (Expr) { .type = ExprType_Error }); - } -} - -ExprVec parser_parse(Parser* parser) -{ - ExprVec exprs; - expr_vec_construct(&exprs); - while (parser->current != Token_Eof) { - expr_vec_push(&exprs, parser_parse_expr(parser)); - } - return exprs; -} - -Expr expr_optimize_fold_adjecent(const Expr* expr); -ExprVec expr_vec_optimize_fold_adjecent(const ExprVec* vec) -{ - ExprVec exprs; - expr_vec_construct(&exprs); - if (vec->length == 0) { - return exprs; - } - Expr a = expr_optimize_fold_adjecent(&vec->data[0]); - for (size_t i = 1; i < vec->length; ++i) { - Expr b = expr_optimize_fold_adjecent(&vec->data[i]); - switch (a.type) { - case ExprType_Incr: - case ExprType_Decr: - case ExprType_Left: - case ExprType_Right: - if (a.type != b.type) { - expr_vec_push(&exprs, a); - a = b; - } else { - a.value += b.value; - } - break; - default: - expr_vec_push(&exprs, a); - a = b; - } - } - expr_vec_push(&exprs, a); - return exprs; -} -Expr expr_optimize_fold_adjecent(const Expr* expr) -{ - if (expr->type == ExprType_Loop) { - return (Expr) { - .type = ExprType_Loop, - .exprs = expr_vec_optimize_fold_adjecent(&expr->exprs), - }; - } else { - return *expr; - } -} - -Expr expr_optimize_eliminate_negation(const Expr* expr); -ExprVec expr_vec_optimize_eliminate_negation(const ExprVec* vec) -{ - ExprVec exprs; - expr_vec_construct(&exprs); - if (vec->length == 0) { - return exprs; - } - expr_vec_push(&exprs, expr_optimize_eliminate_negation(&vec->data[0])); - for (size_t i = 1; i < vec->length; ++i) { - expr_vec_push(&exprs, expr_optimize_eliminate_negation(&vec->data[i])); - Expr* a = &exprs.data[i - 1]; - Expr* b = &exprs.data[i]; - if (a->type == ExprType_Incr && b->type == ExprType_Decr) { - if (a->value > b->value) { - a->value -= b->value; - expr_vec_pop(&exprs); - } else if (a->value < b->value) { - *a = (Expr) { .type = ExprType_Decr, - .value = b->value - a->value }; - expr_vec_pop(&exprs); - } else { - expr_vec_pop(&exprs); - expr_vec_pop(&exprs); - } - } else if (a->type == ExprType_Decr && b->type == ExprType_Incr) { - if (a->value > b->value) { - a->value -= b->value; - expr_vec_pop(&exprs); - } else if (a->value < b->value) { - *a = (Expr) { .type = ExprType_Incr, - .value = b->value - a->value }; - expr_vec_pop(&exprs); - } else { - expr_vec_pop(&exprs); - expr_vec_pop(&exprs); - } - } - if (a->type == ExprType_Left && b->type == ExprType_Right) { - if (a->value > b->value) { - a->value -= b->value; - expr_vec_pop(&exprs); - } else if (a->value < b->value) { - *a = (Expr) { .type = ExprType_Right, - .value = b->value - a->value }; - expr_vec_pop(&exprs); - } else { - expr_vec_pop(&exprs); - expr_vec_pop(&exprs); - } - } else if (a->type == ExprType_Right && b->type == ExprType_Left) { - if (a->value > b->value) { - a->value -= b->value; - expr_vec_pop(&exprs); - } else if (a->value < b->value) { - *a = (Expr) { .type = ExprType_Left, - .value = b->value - a->value }; - expr_vec_pop(&exprs); - } else { - expr_vec_pop(&exprs); - expr_vec_pop(&exprs); - } - } - } - return exprs; -} -Expr expr_optimize_eliminate_negation(const Expr* expr) -{ - if (expr->type == ExprType_Loop) { - return (Expr) { - .type = ExprType_Loop, - .exprs = expr_vec_optimize_eliminate_negation(&expr->exprs), - }; - } else { - return *expr; - } -} - -Expr expr_optimize_eliminate_overflow(const Expr* expr); -ExprVec expr_vec_optimize_eliminate_overflow(const ExprVec* vec) -{ - ExprVec exprs; - expr_vec_construct(&exprs); - for (size_t i = 0; i < vec->length; ++i) { - expr_vec_push(&exprs, expr_optimize_eliminate_overflow(&vec->data[i])); - } - return exprs; -} -Expr expr_optimize_eliminate_overflow(const Expr* expr) -{ - if (expr->type == ExprType_Loop) { - return (Expr) { - .type = ExprType_Loop, - .exprs = expr_vec_optimize_eliminate_overflow(&expr->exprs), - }; - } else if (expr->value > 255) { - return (Expr) { .type = expr->type, .value = expr->value % 256 }; - } else { - return *expr; - } -} - -Expr expr_optimize_replace_zeroing_loops(const Expr* expr); -ExprVec expr_vec_optimize_replace_zeroing_loops(const ExprVec* vec) -{ - ExprVec exprs; - expr_vec_construct(&exprs); - for (size_t i = 0; i < vec->length; ++i) { - expr_vec_push( - &exprs, expr_optimize_replace_zeroing_loops(&vec->data[i]) - ); - } - return exprs; -} -Expr expr_optimize_replace_zeroing_loops(const Expr* expr) -{ - if (expr->type == ExprType_Loop) { - if (expr->exprs.length == 1 - && (expr->exprs.data[0].type == ExprType_Incr - || expr->exprs.data[0].type == ExprType_Decr) - && expr->exprs.data[0].value % 2 != 0) { - return (Expr) { .type = ExprType_Zero }; - } else { - return (Expr) { - .type = ExprType_Loop, - .exprs = expr_vec_optimize_replace_zeroing_loops(&expr->exprs), - }; - } - } else { - return *expr; - } -} - -typedef struct { - uint8_t* code; - size_t pos; - int loop_counter; - bool cmp_flags_set; -} Emitter; - -Emitter emitter_create(uint8_t* code_address) -{ - return (Emitter) { - .code = code_address, - .pos = 0, - .loop_counter = 0, - .cmp_flags_set = false, - }; -} - -void emitter_push_u8(Emitter* emitter, uint8_t value) -{ - emitter->code[emitter->pos] = value; - emitter->pos += 1; -} - -void emitter_push_u32(Emitter* emitter, uint32_t value) -{ - emitter->code[emitter->pos] = value & 0xFF; - emitter->pos += 1; - emitter->code[emitter->pos] = (value >> 8) & 0xFF; - emitter->pos += 1; - emitter->code[emitter->pos] = (value >> 16) & 0xFF; - emitter->pos += 1; - emitter->code[emitter->pos] = value >> 24; - emitter->pos += 1; -} - -void emitter_push_u64(Emitter* emitter, uint64_t value) -{ - emitter->code[emitter->pos] = value & 0xFF; - emitter->pos += 1; - emitter->code[emitter->pos] = (value >> 8) & 0xFF; - emitter->pos += 1; - emitter->code[emitter->pos] = (value >> 16) & 0xFF; - emitter->pos += 1; - emitter->code[emitter->pos] = (value >> 24) & 0xFF; - emitter->pos += 1; - emitter->code[emitter->pos] = (value >> 32) & 0xFF; - emitter->pos += 1; - emitter->code[emitter->pos] = (value >> 40) & 0xFF; - emitter->pos += 1; - emitter->code[emitter->pos] = (value >> 48) & 0xFF; - emitter->pos += 1; - emitter->code[emitter->pos] = (value >> 56) & 0xFF; - emitter->pos += 1; -} - -uint8_t get_char(void) { return (uint8_t)fgetc(stdin); } -void put_char(uint8_t v) { fputc(v, stdout); } - -void emitter_emit_expr(Emitter* emitter, Expr* expr) -{ - emitter->cmp_flags_set = false; - switch (expr->type) { - case ExprType_Error: - fprintf(stderr, "panic: emitter: program contained errors\n"); - exit(1); - break; - case ExprType_Incr: - // add BYTE [rbx], 1 - emitter_push_u8(emitter, 0x80); - emitter_push_u8(emitter, 0x03); - emitter_push_u8(emitter, expr->value); - emitter->cmp_flags_set = true; - break; - case ExprType_Decr: - // sub BYTE [rbx], 1 - emitter_push_u8(emitter, 0x80); - emitter_push_u8(emitter, 0x2b); - emitter_push_u8(emitter, expr->value); - emitter->cmp_flags_set = true; - break; - case ExprType_Left: - // sub rbx, 1 - emitter_push_u8(emitter, 0x48); - emitter_push_u8(emitter, 0x83); - emitter_push_u8(emitter, 0xeb); - emitter_push_u8(emitter, expr->value); - break; - case ExprType_Right: - // add rbx, 1 - emitter_push_u8(emitter, 0x48); - emitter_push_u8(emitter, 0x83); - emitter_push_u8(emitter, 0xc3); - emitter_push_u8(emitter, expr->value); - break; - case ExprType_Output: - // movzx edi, BYTE [rbx] - emitter_push_u8(emitter, 0x0f); - emitter_push_u8(emitter, 0xb6); - emitter_push_u8(emitter, 0x3b); - // movabs rax, - emitter_push_u8(emitter, 0x48); - emitter_push_u8(emitter, 0xb8); - emitter_push_u64(emitter, (uint64_t)put_char); - // call rax - emitter_push_u8(emitter, 0xff); - emitter_push_u8(emitter, 0xd0); - break; - case ExprType_Input: - // movabs rax, - emitter_push_u8(emitter, 0x48); - emitter_push_u8(emitter, 0xb8); - emitter_push_u64(emitter, (uint64_t)get_char); - // call rax - emitter_push_u8(emitter, 0xff); - emitter_push_u8(emitter, 0xd0); - // mov BYTE [rbx], al - emitter_push_u8(emitter, 0x88); - emitter_push_u8(emitter, 0x03); - break; - case ExprType_Loop: - fprintf(stderr, "panic: emitter: unexpected loop\n"); - exit(1); - break; - case ExprType_Zero: - // mov BYTE [rbx], 0 - emitter_push_u8(emitter, 0xc6); - emitter_push_u8(emitter, 0x03); - emitter_push_u8(emitter, 0x00); - emitter->cmp_flags_set = true; - break; - } -} - -void emitter_emit_expr_vec(Emitter* emitter, ExprVec* vec); - -void emitter_emit_loop(Emitter* emitter, Expr* expr) -{ - int64_t start_loc = (int64_t)&emitter->code[emitter->pos]; - emitter_emit_expr_vec(emitter, &expr->exprs); - if (!emitter->cmp_flags_set) { - // cmp BYTE [rbx], 0 - emitter_push_u8(emitter, 0x80); - emitter_push_u8(emitter, 0x3b); - emitter_push_u8(emitter, 0x00); - } - - int64_t current_loc = (int64_t)&emitter->code[emitter->pos]; - int32_t relative_address = -(int32_t)(current_loc - start_loc); - if (relative_address >= -127) { - // jne - emitter_push_u8(emitter, 0x75); - emitter_push_u8(emitter, (uint8_t)relative_address - 2); - } else { - // jne - emitter_push_u8(emitter, 0x0f); - emitter_push_u8(emitter, 0x85); - emitter_push_u32(emitter, (uint32_t)relative_address - 6); - } -} - -void emitter_emit_expr_vec(Emitter* emitter, ExprVec* vec) -{ - for (size_t i = 0; i < vec->length; ++i) { - Expr* expr = &vec->data[i]; - if (expr->type == ExprType_Loop) { - emitter_emit_loop(emitter, expr); - } else { - emitter_emit_expr(emitter, expr); - } - } -} - -void emitter_emit_program(Emitter* emitter, ExprVec* program) -{ - // push rbp: - emitter_push_u8(emitter, 0x55); - // mov rbp, rsp - emitter_push_u8(emitter, 0x48); - emitter_push_u8(emitter, 0x89); - emitter_push_u8(emitter, 0xe5); - // push rbx: - emitter_push_u8(emitter, 0x53); - // mov rbx, rdi - emitter_push_u8(emitter, 0x48); - emitter_push_u8(emitter, 0x89); - emitter_push_u8(emitter, 0xfb); - - emitter_emit_expr_vec(emitter, program); - - // pop rbx - emitter_push_u8(emitter, 0x5b); - // pop rbx - emitter_push_u8(emitter, 0x5d); - // ret - emitter_push_u8(emitter, 0xc3); -} - -int main(void) -{ - // const char* text - // = "+++[>[-]++++++++++++++++++++++++++++++++++++++++++++++++++" - // "++++++++++++++++++" - // "++++.---.++++++++++++++++++++.[-]++++++++++.<-]"; - const char* text = ">++[<+++++++++++++>-]<[[>+>+<<-]>[<+>-]++++++++" - "[>++++++++<-]>.[-]<<>++++++++++[>++++++++++[>++" - "++++++++[>++++++++++[>++++++++++[>++++++++++[>+" - "+++++++++[-]<-]<-]<-]<-]<-]<-]<-]++++++++++."; - + const char* text = "++++++++++[>+<-]"; printf("\ntext:%s\n\"%s\"%s\n", color_bright_green, text, color_reset); + Parser parser = parser_create(lexer_from_string(text, strlen(text))); + + // Parser parser = parser_create(lexer_from_args_or_stdin(argc, argv)); char* ast_string = malloc(sizeof(char) * 33768); ast_string[0] = '\0'; - Parser parser = parser_create(text, strlen(text)); ExprVec ast = parser_parse(&parser); { expr_vec_stringify(&ast, ast_string, 0); @@ -787,7 +43,7 @@ int main(void) expr_vec_free(&previous_ast); } previous_ast = ast; - ast = expr_vec_optimize_fold_adjecent(&ast); + ast = optimize_fold_adjecent(&ast); printf("%sfold_adjecent:%s\n", color_bold, color_reset); if (!expr_vec_equal(&ast, &previous_ast)) { ast_string[0] = '\0'; @@ -797,7 +53,7 @@ int main(void) expr_vec_free(&previous_ast); previous_ast = ast; - ast = expr_vec_optimize_eliminate_negation(&ast); + ast = optimize_eliminate_negation(&ast); printf("%seliminate_negation:%s\n", color_bold, color_reset); if (!expr_vec_equal(&ast, &previous_ast)) { ast_string[0] = '\0'; @@ -807,7 +63,7 @@ int main(void) expr_vec_free(&previous_ast); previous_ast = ast; - ast = expr_vec_optimize_eliminate_overflow(&ast); + ast = optimize_eliminate_overflow(&ast); printf("%seliminate_overflow:%s\n", color_bold, color_reset); if (!expr_vec_equal(&ast, &previous_ast)) { ast_string[0] = '\0'; @@ -817,7 +73,7 @@ int main(void) expr_vec_free(&previous_ast); previous_ast = ast; - ast = expr_vec_optimize_replace_zeroing_loops(&ast); + ast = optimize_replace_zeroing_loops(&ast); printf("%sreplace_zeroing_loops:%s\n", color_bold, color_reset); if (!expr_vec_equal(&ast, &previous_ast)) { ast_string[0] = '\0'; @@ -835,7 +91,7 @@ int main(void) printf("\n%sfinal:%s\n%s\n", color_bold, color_reset, ast_string); size_t code_size = 33678; - uint8_t* code = mmap( + void* code = mmap( NULL, code_size, PROT_READ | PROT_WRITE | PROT_EXEC, @@ -854,13 +110,11 @@ int main(void) uint8_t* memory = malloc(30000); memset(memory, 0, 30000); - void (*runnable)(uint8_t* memory) = (void (*)(uint8_t* memory))(void*)code; - printf("\n%scode:%s\n", color_bold, color_reset); for (size_t y = 0; y < 8; ++y) { for (size_t x = 0; x < 8; ++x) { - uint8_t v = code[y * 8 + x]; + uint8_t v = ((uint8_t*)code)[y * 8 + x]; if (v == 0) { fputs(color_gray, stdout); } @@ -872,6 +126,11 @@ int main(void) printf("\n%sresult:%s\n", color_bold, color_reset); +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wpedantic" + void (*runnable)(uint8_t* memory) = (void (*)(uint8_t* memory))code; +#pragma GCC diagnostic pop + runnable(memory); printf("\n%smemory:%s\n", color_bold, color_reset); diff --git a/optimizer.c b/optimizer.c new file mode 100644 index 0000000..d8e9804 --- /dev/null +++ b/optimizer.c @@ -0,0 +1,222 @@ +#include "optimizer.h" +#include "expr.h" + +/* + * fold adjecent + * + * A(N) :: { Incr(N) | Decr(N) | Right(N) | Left(N) } + * + * [A(N1) A(N2)] -> [A(N1 + N2)] + * + */ + +ExprVec optimize_fold_adjecent(const ExprVec* vec) +{ + ExprVec exprs; + expr_vec_construct(&exprs); + if (vec->length == 0) { + return exprs; + } + Expr a = expr_optimize_fold_adjecent(&vec->data[0]); + for (size_t i = 1; i < vec->length; ++i) { + Expr b = expr_optimize_fold_adjecent(&vec->data[i]); + switch (a.type) { + case ExprType_Incr: + case ExprType_Decr: + case ExprType_Left: + case ExprType_Right: + if (a.type != b.type) { + expr_vec_push(&exprs, a); + a = b; + } else { + a.value += b.value; + } + break; + default: + expr_vec_push(&exprs, a); + a = b; + } + } + expr_vec_push(&exprs, a); + return exprs; +} + +Expr expr_optimize_fold_adjecent(const Expr* expr) +{ + if (expr->type == ExprType_Loop) { + return (Expr) { + .type = ExprType_Loop, + .exprs = optimize_fold_adjecent(&expr->exprs), + }; + } else { + return *expr; + } +} + +/* + * eliminate negation + * + * A(N), B(N) :: { Incr(N) | Decr(N) | Right(N) | Left(N) } + * + * [A(N) B(N)] = [] + * + * [A(N1) B(N2)] ? N1 == N2 -> [] + * [A(N1) B(N2)] ? N1 < N2 -> [B(N2 - N1)] + * [A(N1) B(N2)] ? N1 > N2 -> [A(N1 - N2)] + * + */ + +ExprVec optimize_eliminate_negation(const ExprVec* vec) +{ + ExprVec exprs; + expr_vec_construct(&exprs); + if (vec->length == 0) { + return exprs; + } + expr_vec_push(&exprs, expr_optimize_eliminate_negation(&vec->data[0])); + for (size_t i = 1; i < vec->length; ++i) { + expr_vec_push(&exprs, expr_optimize_eliminate_negation(&vec->data[i])); + Expr* a = &exprs.data[i - 1]; + Expr* b = &exprs.data[i]; + if (a->type == ExprType_Incr && b->type == ExprType_Decr) { + if (a->value > b->value) { + a->value -= b->value; + expr_vec_pop(&exprs); + } else if (a->value < b->value) { + *a = (Expr) { .type = ExprType_Decr, + .value = b->value - a->value }; + expr_vec_pop(&exprs); + } else { + expr_vec_pop(&exprs); + expr_vec_pop(&exprs); + } + } else if (a->type == ExprType_Decr && b->type == ExprType_Incr) { + if (a->value > b->value) { + a->value -= b->value; + expr_vec_pop(&exprs); + } else if (a->value < b->value) { + *a = (Expr) { .type = ExprType_Incr, + .value = b->value - a->value }; + expr_vec_pop(&exprs); + } else { + expr_vec_pop(&exprs); + expr_vec_pop(&exprs); + } + } + if (a->type == ExprType_Left && b->type == ExprType_Right) { + if (a->value > b->value) { + a->value -= b->value; + expr_vec_pop(&exprs); + } else if (a->value < b->value) { + *a = (Expr) { .type = ExprType_Right, + .value = b->value - a->value }; + expr_vec_pop(&exprs); + } else { + expr_vec_pop(&exprs); + expr_vec_pop(&exprs); + } + } else if (a->type == ExprType_Right && b->type == ExprType_Left) { + if (a->value > b->value) { + a->value -= b->value; + expr_vec_pop(&exprs); + } else if (a->value < b->value) { + *a = (Expr) { .type = ExprType_Left, + .value = b->value - a->value }; + expr_vec_pop(&exprs); + } else { + expr_vec_pop(&exprs); + expr_vec_pop(&exprs); + } + } + } + return exprs; +} + +Expr expr_optimize_eliminate_negation(const Expr* expr) +{ + if (expr->type == ExprType_Loop) { + return (Expr) { + .type = ExprType_Loop, + .exprs = optimize_eliminate_negation(&expr->exprs), + }; + } else { + return *expr; + } +} + +/* + * eliminate overflow + * + * A(N) :: { Incr(N) | Decr(N) | Right(N) | Left(N) } + * + * N > 255 + * + * A(N) -> A(N % 256) + * + */ + +ExprVec optimize_eliminate_overflow(const ExprVec* vec) +{ + ExprVec exprs; + expr_vec_construct(&exprs); + for (size_t i = 0; i < vec->length; ++i) { + expr_vec_push(&exprs, expr_optimize_eliminate_overflow(&vec->data[i])); + } + return exprs; +} + +Expr expr_optimize_eliminate_overflow(const Expr* expr) +{ + if (expr->type == ExprType_Loop) { + return (Expr) { + .type = ExprType_Loop, + .exprs = optimize_eliminate_overflow(&expr->exprs), + }; + } else if (expr->value > 255) { + return (Expr) { .type = expr->type, .value = expr->value % 256 }; + } else { + return *expr; + } +} + +/* + * replace zeroing loops + * + * A(N) :: { Incr(N) | Decr(N) } + * + * N % 2 == 1 + * + * Loop[A(N)] -> Zero + * + */ + +ExprVec optimize_replace_zeroing_loops(const ExprVec* vec) +{ + ExprVec exprs; + expr_vec_construct(&exprs); + for (size_t i = 0; i < vec->length; ++i) { + expr_vec_push( + &exprs, expr_optimize_replace_zeroing_loops(&vec->data[i]) + ); + } + return exprs; +} + +Expr expr_optimize_replace_zeroing_loops(const Expr* expr) +{ + if (expr->type == ExprType_Loop) { + if (expr->exprs.length == 1 + && (expr->exprs.data[0].type == ExprType_Incr + || expr->exprs.data[0].type == ExprType_Decr) + && expr->exprs.data[0].value % 2 != 0) { + return (Expr) { .type = ExprType_Zero }; + } else { + return (Expr) { + .type = ExprType_Loop, + .exprs = optimize_replace_zeroing_loops(&expr->exprs), + }; + } + } else { + return *expr; + } +} diff --git a/optimizer.h b/optimizer.h new file mode 100644 index 0000000..6ed7c65 --- /dev/null +++ b/optimizer.h @@ -0,0 +1,18 @@ +#ifndef OPTIMIZER_H +#define OPTIMIZER_H + +#include "expr.h" + +ExprVec optimize_fold_adjecent(const ExprVec* vec); +Expr expr_optimize_fold_adjecent(const Expr* expr); + +ExprVec optimize_eliminate_negation(const ExprVec* vec); +Expr expr_optimize_eliminate_negation(const Expr* expr); + +ExprVec optimize_eliminate_overflow(const ExprVec* vec); +Expr expr_optimize_eliminate_overflow(const Expr* expr); + +ExprVec optimize_replace_zeroing_loops(const ExprVec* vec); +Expr expr_optimize_replace_zeroing_loops(const Expr* expr); + +#endif diff --git a/parser.c b/parser.c new file mode 100644 index 0000000..ea31e87 --- /dev/null +++ b/parser.c @@ -0,0 +1,169 @@ +#include "parser.h" +#include + +Lexer lexer_from_string(const char* text, size_t length) +{ + Lexer lexer = (Lexer) { + .type = LexerType_String, + .current = text[0], + .string = { + .text = text, + .index = 0, + .length = length, + }, + }; + return lexer; +} + +Lexer lexer_from_file(FILE* file) +{ + Lexer lexer = (Lexer) { + .type = LexerType_File, + .current = 1, + .file = file, + }; + lexer_step(&lexer); + return lexer; +} + +Lexer lexer_from_args_or_stdin(int argc, char** argv) +{ + if (argc >= 2) { + FILE* file = fopen(argv[1], "r"); + if (!file) { + fprintf(stderr, "panic: could not open file \"%s\"\n", argv[1]); + exit(1); + } + return lexer_from_file(file); + } else { + return lexer_from_file(stdin); + } +} + +bool lexer_done(Lexer* lexer) +{ + switch (lexer->type) { + case LexerType_String: + return lexer->string.index >= lexer->string.length; + case LexerType_File: + return lexer->current == '\0'; + } + exit(1); +} + +void lexer_step(Lexer* lexer) +{ + if (lexer_done(lexer)) { + return; + } + switch (lexer->type) { + case LexerType_String: + lexer->string.index += 1; + lexer->current = lexer->string.text[lexer->string.index]; + break; + case LexerType_File: { + int c = fgetc(lexer->file); + lexer->current = c != EOF ? (char)c : '\0'; + } break; + } +} + +Token lexer_next(Lexer* lexer) +{ + if (lexer_done(lexer)) { + return Token_Eof; + } + switch (lexer->current) { + case '+': + return (lexer_step(lexer), Token_Plus); + case '-': + return (lexer_step(lexer), Token_Minus); + case '<': + return (lexer_step(lexer), Token_LT); + case '>': + return (lexer_step(lexer), Token_GT); + case '.': + return (lexer_step(lexer), Token_Dot); + case ',': + return (lexer_step(lexer), Token_Comma); + case '[': + return (lexer_step(lexer), Token_LBracket); + case ']': + return (lexer_step(lexer), Token_RBracket); + default: + return (lexer_step(lexer), lexer_next(lexer)); + } +} + +Parser parser_create(Lexer lexer) +{ + return (Parser) { + .lexer = lexer, + .current = lexer_next(&lexer), + }; +} + +void parser_step(Parser* parser) +{ + parser->current = lexer_next(&parser->lexer); +} +Expr parser_parse_expr(Parser* parser); + +Expr parser_parse_loop(Parser* parser) +{ + parser_step(parser); + ExprVec exprs; + expr_vec_construct(&exprs); + while (parser->current != Token_Eof && parser->current != Token_RBracket) { + expr_vec_push(&exprs, parser_parse_expr(parser)); + } + if (parser->current != Token_RBracket) { + return (Expr) { .type = ExprType_Error }; + } + parser_step(parser); + return (Expr) { .type = ExprType_Loop, .exprs = exprs }; +} + +Expr parser_parse_expr(Parser* parser) +{ + switch (parser->current) { + case Token_Plus: + return ( + parser_step(parser), + (Expr) { .type = ExprType_Incr, .value = 1 } + ); + case Token_Minus: + return ( + parser_step(parser), + (Expr) { .type = ExprType_Decr, .value = 1 } + ); + case Token_LT: + return ( + parser_step(parser), + (Expr) { .type = ExprType_Left, .value = 1 } + ); + case Token_GT: + return ( + parser_step(parser), + (Expr) { .type = ExprType_Right, .value = 1 } + ); + case Token_Dot: + return (parser_step(parser), (Expr) { .type = ExprType_Output }); + case Token_Comma: + return (parser_step(parser), (Expr) { .type = ExprType_Input }); + case Token_LBracket: + return parser_parse_loop(parser); + default: + return (parser_step(parser), (Expr) { .type = ExprType_Error }); + } +} + +ExprVec parser_parse(Parser* parser) +{ + ExprVec exprs; + expr_vec_construct(&exprs); + while (parser->current != Token_Eof) { + expr_vec_push(&exprs, parser_parse_expr(parser)); + } + return exprs; +} diff --git a/parser.h b/parser.h new file mode 100644 index 0000000..ff820b2 --- /dev/null +++ b/parser.h @@ -0,0 +1,55 @@ +#ifndef PARSER_H +#define PARSER_H + +#include "expr.h" +#include + +typedef enum { + Token_Eof, + Token_Plus, + Token_Minus, + Token_LT, + Token_GT, + Token_Dot, + Token_Comma, + Token_LBracket, + Token_RBracket +} Token; + +typedef enum { + LexerType_String, + LexerType_File, +} LexerType; + +typedef struct { + LexerType type; + char current; + union { + struct { + const char* text; + size_t index; + size_t length; + } string; + FILE* file; + }; +} Lexer; + +Lexer lexer_from_string(const char* text, size_t length); +Lexer lexer_from_file(FILE* file); +Lexer lexer_from_args_or_stdin(int argc, char** argv); +bool lexer_done(Lexer* lexer); +void lexer_step(Lexer* lexer); +Token lexer_next(Lexer* lexer); + +typedef struct { + Lexer lexer; + Token current; +} Parser; + +Parser parser_create(Lexer lexer); +void parser_step(Parser* parser); +Expr parser_parse_loop(Parser* parser); +Expr parser_parse_expr(Parser* parser); +ExprVec parser_parse(Parser* parser); + +#endif diff --git a/print.h b/print.h new file mode 100644 index 0000000..46151a2 --- /dev/null +++ b/print.h @@ -0,0 +1,27 @@ +#ifndef COLOR_H +#define COLOR_H + +#define MAYBE_UNUSED __attribute__((unused)) + +MAYBE_UNUSED static const char* color_reset = "\x1b[0m"; +MAYBE_UNUSED static const char* color_bold = "\x1b[1m"; + +MAYBE_UNUSED static const char* color_black = "\x1b[30m"; +MAYBE_UNUSED static const char* color_red = "\x1b[31m"; +MAYBE_UNUSED static const char* color_green = "\x1b[32m"; +MAYBE_UNUSED static const char* color_yellow = "\x1b[33m"; +MAYBE_UNUSED static const char* color_blue = "\x1b[34m"; +MAYBE_UNUSED static const char* color_magenta = "\x1b[35m"; +MAYBE_UNUSED static const char* color_cyan = "\x1b[36m"; +MAYBE_UNUSED static const char* color_bright_gray = "\x1b[37m"; + +MAYBE_UNUSED static const char* color_gray = "\x1b[90m"; +MAYBE_UNUSED static const char* color_bright_red = "\x1b[91m"; +MAYBE_UNUSED static const char* color_bright_green = "\x1b[92m"; +MAYBE_UNUSED static const char* color_bright_yellow = "\x1b[93m"; +MAYBE_UNUSED static const char* color_bright_blue = "\x1b[94m"; +MAYBE_UNUSED static const char* color_bright_magenta = "\x1b[95m"; +MAYBE_UNUSED static const char* color_bright_cyan = "\x1b[96m"; +MAYBE_UNUSED static const char* color_white = "\x1b[97m"; + +#endif diff --git a/runtime.c b/runtime.c new file mode 100644 index 0000000..a04ce90 --- /dev/null +++ b/runtime.c @@ -0,0 +1,7 @@ +#include "runtime.h" +#include +#include + +uint8_t get_char(void) { return (uint8_t)fgetc(stdin); } + +void put_char(uint8_t v) { fputc(v, stdout); } diff --git a/runtime.h b/runtime.h new file mode 100644 index 0000000..5ad0369 --- /dev/null +++ b/runtime.h @@ -0,0 +1,9 @@ +#ifndef RUNTIME_H +#define RUNTIME_H + +#include + +uint8_t get_char(void); +void put_char(uint8_t v); + +#endif