diff --git a/build.sh b/build.sh index 82751fd..75df40e 100644 --- a/build.sh +++ b/build.sh @@ -1,4 +1,4 @@ #!/bin/bash -gcc -o program main.c -std=c17 -Wall -Wextra -Wpedantic -O3 +gcc -o program -std=c17 -Wall -Wextra -Wpedantic -O3 `cat compile_files.txt` diff --git a/compile_files.txt b/compile_files.txt new file mode 100644 index 0000000..714ecba --- /dev/null +++ b/compile_files.txt @@ -0,0 +1,7 @@ +main.c +expr.c +parser.c +optimizer.c +emitter.c +runtime.c + diff --git a/debug.sh b/debug.sh index 7550f41..8f072b2 100644 --- a/debug.sh +++ b/debug.sh @@ -2,7 +2,7 @@ set -xe -gcc -o program main.c -std=c17 -Wall -Wextra -Wpedantic -O3 -g -fsanitize=address,undefined +gcc -o program -std=c17 -Wall -Wextra -Wpedantic -O3 -g -fsanitize=address,undefined `cat compile_files.txt` ./program diff --git a/emitter.c b/emitter.c new file mode 100644 index 0000000..7bcae50 --- /dev/null +++ b/emitter.c @@ -0,0 +1,190 @@ +#include "emitter.h" +#include "expr.h" +#include "runtime.h" +#include +#include + +Emitter emitter_create(uint8_t* code_address) +{ + return (Emitter) { + .code = code_address, + .pos = 0, + .loop_counter = 0, + .cmp_flags_set = false, + }; +} + +void emitter_push_u8(Emitter* emitter, uint8_t value) +{ + emitter->code[emitter->pos] = value; + emitter->pos += 1; +} + +void emitter_push_u32(Emitter* emitter, uint32_t value) +{ + emitter->code[emitter->pos] = value & 0xFF; + emitter->pos += 1; + emitter->code[emitter->pos] = (value >> 8) & 0xFF; + emitter->pos += 1; + emitter->code[emitter->pos] = (value >> 16) & 0xFF; + emitter->pos += 1; + emitter->code[emitter->pos] = value >> 24; + emitter->pos += 1; +} + +void emitter_push_u64(Emitter* emitter, uint64_t value) +{ + emitter->code[emitter->pos] = value & 0xFF; + emitter->pos += 1; + emitter->code[emitter->pos] = (value >> 8) & 0xFF; + emitter->pos += 1; + emitter->code[emitter->pos] = (value >> 16) & 0xFF; + emitter->pos += 1; + emitter->code[emitter->pos] = (value >> 24) & 0xFF; + emitter->pos += 1; + emitter->code[emitter->pos] = (value >> 32) & 0xFF; + emitter->pos += 1; + emitter->code[emitter->pos] = (value >> 40) & 0xFF; + emitter->pos += 1; + emitter->code[emitter->pos] = (value >> 48) & 0xFF; + emitter->pos += 1; + emitter->code[emitter->pos] = (value >> 56) & 0xFF; + emitter->pos += 1; +} + +void emitter_emit_expr(Emitter* emitter, Expr* expr) +{ + emitter->cmp_flags_set = false; + switch (expr->type) { + case ExprType_Error: + fprintf(stderr, "panic: emitter: program contained errors\n"); + exit(1); + break; + case ExprType_Incr: + // add BYTE [rbx], 1 + emitter_push_u8(emitter, 0x80); + emitter_push_u8(emitter, 0x03); + emitter_push_u8(emitter, expr->value); + emitter->cmp_flags_set = true; + break; + case ExprType_Decr: + // sub BYTE [rbx], 1 + emitter_push_u8(emitter, 0x80); + emitter_push_u8(emitter, 0x2b); + emitter_push_u8(emitter, expr->value); + emitter->cmp_flags_set = true; + break; + case ExprType_Left: + // sub rbx, 1 + emitter_push_u8(emitter, 0x48); + emitter_push_u8(emitter, 0x83); + emitter_push_u8(emitter, 0xeb); + emitter_push_u8(emitter, expr->value); + break; + case ExprType_Right: + // add rbx, 1 + emitter_push_u8(emitter, 0x48); + emitter_push_u8(emitter, 0x83); + emitter_push_u8(emitter, 0xc3); + emitter_push_u8(emitter, expr->value); + break; + case ExprType_Output: + // movzx edi, BYTE [rbx] + emitter_push_u8(emitter, 0x0f); + emitter_push_u8(emitter, 0xb6); + emitter_push_u8(emitter, 0x3b); + // movabs rax, + emitter_push_u8(emitter, 0x48); + emitter_push_u8(emitter, 0xb8); + emitter_push_u64(emitter, (uint64_t)put_char); + // call rax + emitter_push_u8(emitter, 0xff); + emitter_push_u8(emitter, 0xd0); + break; + case ExprType_Input: + // movabs rax, + emitter_push_u8(emitter, 0x48); + emitter_push_u8(emitter, 0xb8); + emitter_push_u64(emitter, (uint64_t)get_char); + // call rax + emitter_push_u8(emitter, 0xff); + emitter_push_u8(emitter, 0xd0); + // mov BYTE [rbx], al + emitter_push_u8(emitter, 0x88); + emitter_push_u8(emitter, 0x03); + break; + case ExprType_Loop: + fprintf(stderr, "panic: emitter: unexpected loop\n"); + exit(1); + break; + case ExprType_Zero: + // mov BYTE [rbx], 0 + emitter_push_u8(emitter, 0xc6); + emitter_push_u8(emitter, 0x03); + emitter_push_u8(emitter, 0x00); + emitter->cmp_flags_set = true; + break; + } +} + +void emitter_emit_loop(Emitter* emitter, Expr* expr) +{ + int64_t start_loc = (int64_t)&emitter->code[emitter->pos]; + emitter_emit_expr_vec(emitter, &expr->exprs); + if (!emitter->cmp_flags_set) { + // cmp BYTE [rbx], 0 + emitter_push_u8(emitter, 0x80); + emitter_push_u8(emitter, 0x3b); + emitter_push_u8(emitter, 0x00); + } + + int64_t current_loc = (int64_t)&emitter->code[emitter->pos]; + int32_t relative_address = -(int32_t)(current_loc - start_loc); + if (relative_address >= -127) { + // jne + emitter_push_u8(emitter, 0x75); + emitter_push_u8(emitter, (uint8_t)relative_address - 2); + } else { + // jne + emitter_push_u8(emitter, 0x0f); + emitter_push_u8(emitter, 0x85); + emitter_push_u32(emitter, (uint32_t)relative_address - 6); + } +} + +void emitter_emit_expr_vec(Emitter* emitter, ExprVec* vec) +{ + for (size_t i = 0; i < vec->length; ++i) { + Expr* expr = &vec->data[i]; + if (expr->type == ExprType_Loop) { + emitter_emit_loop(emitter, expr); + } else { + emitter_emit_expr(emitter, expr); + } + } +} + +void emitter_emit_program(Emitter* emitter, ExprVec* program) +{ + // push rbp: + emitter_push_u8(emitter, 0x55); + // mov rbp, rsp + emitter_push_u8(emitter, 0x48); + emitter_push_u8(emitter, 0x89); + emitter_push_u8(emitter, 0xe5); + // push rbx: + emitter_push_u8(emitter, 0x53); + // mov rbx, rdi + emitter_push_u8(emitter, 0x48); + emitter_push_u8(emitter, 0x89); + emitter_push_u8(emitter, 0xfb); + + emitter_emit_expr_vec(emitter, program); + + // pop rbx + emitter_push_u8(emitter, 0x5b); + // pop rbx + emitter_push_u8(emitter, 0x5d); + // ret + emitter_push_u8(emitter, 0xc3); +} diff --git a/emitter.h b/emitter.h new file mode 100644 index 0000000..72111dd --- /dev/null +++ b/emitter.h @@ -0,0 +1,25 @@ +#ifndef EMITTER_H +#define EMITTER_H + +#include "expr.h" +#include +#include +#include + +typedef struct { + uint8_t* code; + size_t pos; + int loop_counter; + bool cmp_flags_set; +} Emitter; + +Emitter emitter_create(uint8_t* code_address); +void emitter_push_u8(Emitter* emitter, uint8_t value); +void emitter_push_u32(Emitter* emitter, uint32_t value); +void emitter_push_u64(Emitter* emitter, uint64_t value); +void emitter_emit_expr(Emitter* emitter, Expr* expr); +void emitter_emit_loop(Emitter* emitter, Expr* expr); +void emitter_emit_expr_vec(Emitter* emitter, ExprVec* vec); +void emitter_emit_program(Emitter* emitter, ExprVec* program); + +#endif diff --git a/expr.c b/expr.c new file mode 100644 index 0000000..050d2d7 --- /dev/null +++ b/expr.c @@ -0,0 +1,192 @@ +#include "expr.h" +#include "print.h" +#include +#include +#include +#include + +void expr_vec_construct(ExprVec* vec) +{ + *vec = (ExprVec) { + .data = malloc(sizeof(Expr) * 8), + .capacity = 8, + .length = 0, + }; +} + +void expr_vec_destroy(ExprVec* vec) { free(vec->data); } + +void expr_vec_free(ExprVec* vec) +{ + for (size_t i = 0; i < vec->length; ++i) { + expr_free(&vec->data[i]); + } + expr_vec_destroy(vec); +} + +void expr_vec_push(ExprVec* vec, Expr expr) +{ + if (vec->length + 1 > vec->capacity) { + vec->capacity *= 2; + vec->data = realloc(vec->data, sizeof(Expr) * vec->capacity); + } + vec->data[vec->length] = expr; + vec->length += 1; +} + +Expr expr_vec_pop(ExprVec* vec) +{ + vec->length -= 1; + return vec->data[vec->length]; +} + +bool expr_vec_equal(const ExprVec* self, const ExprVec* other) +{ + if (self->length != other->length) { + return false; + } + for (size_t i = 0; i < self->length; ++i) { + if (!expr_equal(&self->data[i], &other->data[i])) { + return false; + } + } + return true; +} + +void expr_free(Expr* expr) +{ + switch (expr->type) { + case ExprType_Loop: + expr_vec_free(&expr->exprs); + break; + default: + break; + } +} + +const char* expr_bracket_color(int depth) +{ + switch (depth % 3) { + case 0: + return color_bright_yellow; + case 1: + return color_magenta; + case 2: + return color_cyan; + } + return NULL; +} + +void expr_stringify_concat_value(Expr* expr, char* acc, int depth) +{ + strcat(acc, color_bold); + strcat(acc, expr_bracket_color(depth)); + strcat(acc, "("); + strcat(acc, color_reset); + char value[16] = { 0 }; + snprintf(value, 16, "%d", expr->value); + strcat(acc, value); + strcat(acc, color_bold); + strcat(acc, expr_bracket_color(depth)); + strcat(acc, ")"); + strcat(acc, color_reset); +} + +void expr_vec_stringify(ExprVec* vec, char* acc, int depth) +{ + strcat(acc, color_bold); + strcat(acc, expr_bracket_color(depth)); + strcat(acc, "["); + strcat(acc, color_reset); + for (size_t i = 0; i < vec->length; ++i) { + if (i != 0) { + strcat(acc, " "); + } + expr_stringify(&vec->data[i], acc, depth + 1); + } + strcat(acc, color_bold); + strcat(acc, expr_bracket_color(depth)); + strcat(acc, "]"); + strcat(acc, color_reset); +} + +void expr_stringify(Expr* expr, char* acc, int depth) +{ + switch (expr->type) { + case ExprType_Error: + strcat(acc, color_bright_red); + strcat(acc, "Error"); + strcat(acc, color_reset); + break; + case ExprType_Incr: + strcat(acc, color_yellow); + strcat(acc, "Incr"); + strcat(acc, color_reset); + expr_stringify_concat_value(expr, acc, depth); + break; + case ExprType_Decr: + strcat(acc, color_yellow); + strcat(acc, "Decr"); + strcat(acc, color_reset); + expr_stringify_concat_value(expr, acc, depth); + break; + case ExprType_Left: + strcat(acc, color_green); + strcat(acc, "Left"); + strcat(acc, color_reset); + expr_stringify_concat_value(expr, acc, depth); + break; + case ExprType_Right: + strcat(acc, color_green); + strcat(acc, "Right"); + strcat(acc, color_reset); + expr_stringify_concat_value(expr, acc, depth); + break; + case ExprType_Output: + strcat(acc, color_bright_gray); + strcat(acc, "Output"); + strcat(acc, color_reset); + break; + case ExprType_Input: + strcat(acc, color_bright_gray); + strcat(acc, "Input"); + strcat(acc, color_reset); + break; + case ExprType_Loop: + strcat(acc, color_bright_red); + strcat(acc, "Loop"); + expr_vec_stringify(&expr->exprs, acc, depth); + strcat(acc, color_reset); + break; + case ExprType_Zero: + strcat(acc, color_yellow); + strcat(acc, "Zero"); + strcat(acc, color_reset); + break; + } +} + +bool expr_equal(const Expr* self, const Expr* other) +{ + if (self->type != other->type) { + return false; + } + switch (self->type) { + case ExprType_Incr: + case ExprType_Decr: + case ExprType_Left: + case ExprType_Right: + if (self->value != other->value) { + return false; + } + break; + case ExprType_Loop: + if (!expr_vec_equal(&self->exprs, &other->exprs)) { + return false; + } + break; + default: + break; + } + return true; +} diff --git a/expr.h b/expr.h new file mode 100644 index 0000000..8b6c2ad --- /dev/null +++ b/expr.h @@ -0,0 +1,49 @@ +#ifndef EXPR_H +#define EXPR_H + +#include +#include + +typedef enum { + ExprType_Error, + ExprType_Incr, + ExprType_Decr, + ExprType_Left, + ExprType_Right, + ExprType_Output, + ExprType_Input, + ExprType_Loop, + ExprType_Zero, +} ExprType; + +typedef struct Expr Expr; + +typedef struct ExprVec { + Expr* data; + size_t capacity; + size_t length; +} ExprVec; + +void expr_vec_construct(ExprVec* vec); +void expr_vec_destroy(ExprVec* vec); +void expr_vec_free(ExprVec* vec); +void expr_vec_push(ExprVec* vec, Expr expr); +Expr expr_vec_pop(ExprVec* vec); +bool expr_vec_equal(const ExprVec* self, const ExprVec* other); + +struct Expr { + ExprType type; + union { + int value; + ExprVec exprs; + }; +}; + +void expr_free(Expr* expr); +const char* expr_bracket_color(int depth); +void expr_stringify_concat_value(Expr* expr, char* acc, int depth); +void expr_vec_stringify(ExprVec* vec, char* acc, int depth); +void expr_stringify(Expr* expr, char* acc, int depth); +bool expr_equal(const Expr* self, const Expr* other); + +#endif diff --git a/main.c b/main.c index 51eb875..aac86a8 100644 --- a/main.c +++ b/main.c @@ -1,3 +1,8 @@ +#include "emitter.h" +#include "expr.h" +#include "optimizer.h" +#include "parser.h" +#include "print.h" #include #include #include @@ -5,766 +10,17 @@ #include #include -typedef enum { - Token_Eof, - Token_Plus, - Token_Minus, - Token_LT, - Token_GT, - Token_Dot, - Token_Comma, - Token_LBracket, - Token_RBracket -} Token; - -typedef struct { - const char* text; - size_t index; - size_t length; -} Lexer; - -Lexer lexer_create(const char* text, size_t length) +int main(int argc, char** argv) { - return (Lexer) { .text = text, .index = 0, .length = length }; -} - -void lexer_step(Lexer* lexer) { lexer->index += 1; } - -Token lexer_next(Lexer* lexer) -{ - if (lexer->index >= lexer->length) { - return Token_Eof; - } - switch (lexer->text[lexer->index]) { - case '+': - return (lexer_step(lexer), Token_Plus); - case '-': - return (lexer_step(lexer), Token_Minus); - case '<': - return (lexer_step(lexer), Token_LT); - case '>': - return (lexer_step(lexer), Token_GT); - case '.': - return (lexer_step(lexer), Token_Dot); - case ',': - return (lexer_step(lexer), Token_Comma); - case '[': - return (lexer_step(lexer), Token_LBracket); - case ']': - return (lexer_step(lexer), Token_RBracket); - default: - return (lexer_step(lexer), lexer_next(lexer)); - } -} - -typedef enum { - ExprType_Error, - ExprType_Incr, - ExprType_Decr, - ExprType_Left, - ExprType_Right, - ExprType_Output, - ExprType_Input, - ExprType_Loop, - ExprType_Zero, -} ExprType; - -typedef struct Expr Expr; - -typedef struct ExprVec { - Expr* data; - size_t capacity; - size_t length; -} ExprVec; - -struct Expr { - ExprType type; - union { - int value; - ExprVec exprs; - }; -}; - -void expr_vec_construct(ExprVec* vec) -{ - *vec = (ExprVec) { - .data = malloc(sizeof(Expr) * 8), - .capacity = 8, - .length = 0, - }; -} -void expr_vec_destroy(ExprVec* vec) { free(vec->data); } - -void expr_free(Expr* expr); -void expr_vec_free(ExprVec* vec) -{ - for (size_t i = 0; i < vec->length; ++i) { - expr_free(&vec->data[i]); - } - expr_vec_destroy(vec); -} - -void expr_vec_push(ExprVec* vec, Expr expr) -{ - if (vec->length + 1 > vec->capacity) { - vec->capacity *= 2; - vec->data = realloc(vec->data, sizeof(Expr) * vec->capacity); - } - vec->data[vec->length] = expr; - vec->length += 1; -} -Expr expr_vec_pop(ExprVec* vec) -{ - vec->length -= 1; - return vec->data[vec->length]; -} - -void expr_free(Expr* expr) -{ - switch (expr->type) { - case ExprType_Loop: - expr_vec_free(&expr->exprs); - break; - default: - break; - } -} - -const char* color_reset = "\x1b[0m"; -const char* color_bold = "\x1b[1m"; - -const char* color_black = "\x1b[30m"; -const char* color_red = "\x1b[31m"; -const char* color_green = "\x1b[32m"; -const char* color_yellow = "\x1b[33m"; -const char* color_blue = "\x1b[34m"; -const char* color_magenta = "\x1b[35m"; -const char* color_cyan = "\x1b[36m"; -const char* color_bright_gray = "\x1b[37m"; - -const char* color_gray = "\x1b[90m"; -const char* color_bright_red = "\x1b[91m"; -const char* color_bright_green = "\x1b[92m"; -const char* color_bright_yellow = "\x1b[93m"; -const char* color_bright_blue = "\x1b[94m"; -const char* color_bright_magenta = "\x1b[95m"; -const char* color_bright_cyan = "\x1b[96m"; -const char* color_white = "\x1b[97m"; - -const char* expr_bracket_color(int depth) -{ - switch (depth % 3) { - case 0: - return color_bright_yellow; - case 1: - return color_magenta; - case 2: - return color_cyan; - } - return NULL; -} - -void expr_stringify_concat_value(Expr* expr, char* acc, int depth) -{ - strcat(acc, color_bold); - strcat(acc, expr_bracket_color(depth)); - strcat(acc, "("); - strcat(acc, color_reset); - char value[16] = { 0 }; - snprintf(value, 16, "%d", expr->value); - strcat(acc, value); - strcat(acc, color_bold); - strcat(acc, expr_bracket_color(depth)); - strcat(acc, ")"); - strcat(acc, color_reset); -} - -void expr_stringify(Expr* expr, char* acc, int depth); - -void expr_vec_stringify(ExprVec* vec, char* acc, int depth) -{ - strcat(acc, color_bold); - strcat(acc, expr_bracket_color(depth)); - strcat(acc, "["); - strcat(acc, color_reset); - for (size_t i = 0; i < vec->length; ++i) { - if (i != 0) { - strcat(acc, " "); - } - expr_stringify(&vec->data[i], acc, depth + 1); - } - strcat(acc, color_bold); - strcat(acc, expr_bracket_color(depth)); - strcat(acc, "]"); - strcat(acc, color_reset); -} - -void expr_stringify(Expr* expr, char* acc, int depth) -{ - switch (expr->type) { - case ExprType_Error: - strcat(acc, color_bright_red); - strcat(acc, "Error"); - strcat(acc, color_reset); - break; - case ExprType_Incr: - strcat(acc, color_yellow); - strcat(acc, "Incr"); - strcat(acc, color_reset); - expr_stringify_concat_value(expr, acc, depth); - break; - case ExprType_Decr: - strcat(acc, color_yellow); - strcat(acc, "Decr"); - strcat(acc, color_reset); - expr_stringify_concat_value(expr, acc, depth); - break; - case ExprType_Left: - strcat(acc, color_green); - strcat(acc, "Left"); - strcat(acc, color_reset); - expr_stringify_concat_value(expr, acc, depth); - break; - case ExprType_Right: - strcat(acc, color_green); - strcat(acc, "Right"); - strcat(acc, color_reset); - expr_stringify_concat_value(expr, acc, depth); - break; - case ExprType_Output: - strcat(acc, color_bright_gray); - strcat(acc, "Output"); - strcat(acc, color_reset); - break; - case ExprType_Input: - strcat(acc, color_bright_gray); - strcat(acc, "Input"); - strcat(acc, color_reset); - break; - case ExprType_Loop: - strcat(acc, color_bright_red); - strcat(acc, "Loop"); - expr_vec_stringify(&expr->exprs, acc, depth); - strcat(acc, color_reset); - break; - case ExprType_Zero: - strcat(acc, color_yellow); - strcat(acc, "Zero"); - strcat(acc, color_reset); - break; - } -} - -bool expr_equal(const Expr* self, const Expr* other); - -bool expr_vec_equal(const ExprVec* self, const ExprVec* other) -{ - if (self->length != other->length) { - return false; - } - for (size_t i = 0; i < self->length; ++i) { - if (!expr_equal(&self->data[i], &other->data[i])) { - return false; - } - } - return true; -} - -bool expr_equal(const Expr* self, const Expr* other) -{ - if (self->type != other->type) { - return false; - } - switch (self->type) { - case ExprType_Incr: - case ExprType_Decr: - case ExprType_Left: - case ExprType_Right: - if (self->value != other->value) { - return false; - } - break; - case ExprType_Loop: - if (!expr_vec_equal(&self->exprs, &other->exprs)) { - return false; - } - break; - default: - break; - } - return true; -} - -typedef struct { - Lexer lexer; - Token current; -} Parser; - -Parser parser_create(const char* text, size_t length) -{ - Lexer lexer = lexer_create(text, length); - return (Parser) { - .lexer = lexer, - .current = lexer_next(&lexer), - }; -} - -void parser_step(Parser* parser) -{ - parser->current = lexer_next(&parser->lexer); -} - -Expr parser_parse_expr(Parser* parser); - -Expr parser_parse_loop(Parser* parser) -{ - parser_step(parser); - ExprVec exprs; - expr_vec_construct(&exprs); - while (parser->current != Token_Eof && parser->current != Token_RBracket) { - expr_vec_push(&exprs, parser_parse_expr(parser)); - } - if (parser->current != Token_RBracket) { - return (Expr) { .type = ExprType_Error }; - } - parser_step(parser); - return (Expr) { .type = ExprType_Loop, .exprs = exprs }; -} - -Expr parser_parse_expr(Parser* parser) -{ - switch (parser->current) { - case Token_Plus: - return ( - parser_step(parser), - (Expr) { .type = ExprType_Incr, .value = 1 } - ); - case Token_Minus: - return ( - parser_step(parser), - (Expr) { .type = ExprType_Decr, .value = 1 } - ); - case Token_LT: - return ( - parser_step(parser), - (Expr) { .type = ExprType_Left, .value = 1 } - ); - case Token_GT: - return ( - parser_step(parser), - (Expr) { .type = ExprType_Right, .value = 1 } - ); - case Token_Dot: - return (parser_step(parser), (Expr) { .type = ExprType_Output }); - case Token_Comma: - return (parser_step(parser), (Expr) { .type = ExprType_Input }); - case Token_LBracket: - return parser_parse_loop(parser); - default: - return (parser_step(parser), (Expr) { .type = ExprType_Error }); - } -} - -ExprVec parser_parse(Parser* parser) -{ - ExprVec exprs; - expr_vec_construct(&exprs); - while (parser->current != Token_Eof) { - expr_vec_push(&exprs, parser_parse_expr(parser)); - } - return exprs; -} - -Expr expr_optimize_fold_adjecent(const Expr* expr); -ExprVec expr_vec_optimize_fold_adjecent(const ExprVec* vec) -{ - ExprVec exprs; - expr_vec_construct(&exprs); - if (vec->length == 0) { - return exprs; - } - Expr a = expr_optimize_fold_adjecent(&vec->data[0]); - for (size_t i = 1; i < vec->length; ++i) { - Expr b = expr_optimize_fold_adjecent(&vec->data[i]); - switch (a.type) { - case ExprType_Incr: - case ExprType_Decr: - case ExprType_Left: - case ExprType_Right: - if (a.type != b.type) { - expr_vec_push(&exprs, a); - a = b; - } else { - a.value += b.value; - } - break; - default: - expr_vec_push(&exprs, a); - a = b; - } - } - expr_vec_push(&exprs, a); - return exprs; -} -Expr expr_optimize_fold_adjecent(const Expr* expr) -{ - if (expr->type == ExprType_Loop) { - return (Expr) { - .type = ExprType_Loop, - .exprs = expr_vec_optimize_fold_adjecent(&expr->exprs), - }; - } else { - return *expr; - } -} - -Expr expr_optimize_eliminate_negation(const Expr* expr); -ExprVec expr_vec_optimize_eliminate_negation(const ExprVec* vec) -{ - ExprVec exprs; - expr_vec_construct(&exprs); - if (vec->length == 0) { - return exprs; - } - expr_vec_push(&exprs, expr_optimize_eliminate_negation(&vec->data[0])); - for (size_t i = 1; i < vec->length; ++i) { - expr_vec_push(&exprs, expr_optimize_eliminate_negation(&vec->data[i])); - Expr* a = &exprs.data[i - 1]; - Expr* b = &exprs.data[i]; - if (a->type == ExprType_Incr && b->type == ExprType_Decr) { - if (a->value > b->value) { - a->value -= b->value; - expr_vec_pop(&exprs); - } else if (a->value < b->value) { - *a = (Expr) { .type = ExprType_Decr, - .value = b->value - a->value }; - expr_vec_pop(&exprs); - } else { - expr_vec_pop(&exprs); - expr_vec_pop(&exprs); - } - } else if (a->type == ExprType_Decr && b->type == ExprType_Incr) { - if (a->value > b->value) { - a->value -= b->value; - expr_vec_pop(&exprs); - } else if (a->value < b->value) { - *a = (Expr) { .type = ExprType_Incr, - .value = b->value - a->value }; - expr_vec_pop(&exprs); - } else { - expr_vec_pop(&exprs); - expr_vec_pop(&exprs); - } - } - if (a->type == ExprType_Left && b->type == ExprType_Right) { - if (a->value > b->value) { - a->value -= b->value; - expr_vec_pop(&exprs); - } else if (a->value < b->value) { - *a = (Expr) { .type = ExprType_Right, - .value = b->value - a->value }; - expr_vec_pop(&exprs); - } else { - expr_vec_pop(&exprs); - expr_vec_pop(&exprs); - } - } else if (a->type == ExprType_Right && b->type == ExprType_Left) { - if (a->value > b->value) { - a->value -= b->value; - expr_vec_pop(&exprs); - } else if (a->value < b->value) { - *a = (Expr) { .type = ExprType_Left, - .value = b->value - a->value }; - expr_vec_pop(&exprs); - } else { - expr_vec_pop(&exprs); - expr_vec_pop(&exprs); - } - } - } - return exprs; -} -Expr expr_optimize_eliminate_negation(const Expr* expr) -{ - if (expr->type == ExprType_Loop) { - return (Expr) { - .type = ExprType_Loop, - .exprs = expr_vec_optimize_eliminate_negation(&expr->exprs), - }; - } else { - return *expr; - } -} - -Expr expr_optimize_eliminate_overflow(const Expr* expr); -ExprVec expr_vec_optimize_eliminate_overflow(const ExprVec* vec) -{ - ExprVec exprs; - expr_vec_construct(&exprs); - for (size_t i = 0; i < vec->length; ++i) { - expr_vec_push(&exprs, expr_optimize_eliminate_overflow(&vec->data[i])); - } - return exprs; -} -Expr expr_optimize_eliminate_overflow(const Expr* expr) -{ - if (expr->type == ExprType_Loop) { - return (Expr) { - .type = ExprType_Loop, - .exprs = expr_vec_optimize_eliminate_overflow(&expr->exprs), - }; - } else if (expr->value > 255) { - return (Expr) { .type = expr->type, .value = expr->value % 256 }; - } else { - return *expr; - } -} - -Expr expr_optimize_replace_zeroing_loops(const Expr* expr); -ExprVec expr_vec_optimize_replace_zeroing_loops(const ExprVec* vec) -{ - ExprVec exprs; - expr_vec_construct(&exprs); - for (size_t i = 0; i < vec->length; ++i) { - expr_vec_push( - &exprs, expr_optimize_replace_zeroing_loops(&vec->data[i]) - ); - } - return exprs; -} -Expr expr_optimize_replace_zeroing_loops(const Expr* expr) -{ - if (expr->type == ExprType_Loop) { - if (expr->exprs.length == 1 - && (expr->exprs.data[0].type == ExprType_Incr - || expr->exprs.data[0].type == ExprType_Decr) - && expr->exprs.data[0].value % 2 != 0) { - return (Expr) { .type = ExprType_Zero }; - } else { - return (Expr) { - .type = ExprType_Loop, - .exprs = expr_vec_optimize_replace_zeroing_loops(&expr->exprs), - }; - } - } else { - return *expr; - } -} - -typedef struct { - uint8_t* code; - size_t pos; - int loop_counter; - bool cmp_flags_set; -} Emitter; - -Emitter emitter_create(uint8_t* code_address) -{ - return (Emitter) { - .code = code_address, - .pos = 0, - .loop_counter = 0, - .cmp_flags_set = false, - }; -} - -void emitter_push_u8(Emitter* emitter, uint8_t value) -{ - emitter->code[emitter->pos] = value; - emitter->pos += 1; -} - -void emitter_push_u32(Emitter* emitter, uint32_t value) -{ - emitter->code[emitter->pos] = value & 0xFF; - emitter->pos += 1; - emitter->code[emitter->pos] = (value >> 8) & 0xFF; - emitter->pos += 1; - emitter->code[emitter->pos] = (value >> 16) & 0xFF; - emitter->pos += 1; - emitter->code[emitter->pos] = value >> 24; - emitter->pos += 1; -} - -void emitter_push_u64(Emitter* emitter, uint64_t value) -{ - emitter->code[emitter->pos] = value & 0xFF; - emitter->pos += 1; - emitter->code[emitter->pos] = (value >> 8) & 0xFF; - emitter->pos += 1; - emitter->code[emitter->pos] = (value >> 16) & 0xFF; - emitter->pos += 1; - emitter->code[emitter->pos] = (value >> 24) & 0xFF; - emitter->pos += 1; - emitter->code[emitter->pos] = (value >> 32) & 0xFF; - emitter->pos += 1; - emitter->code[emitter->pos] = (value >> 40) & 0xFF; - emitter->pos += 1; - emitter->code[emitter->pos] = (value >> 48) & 0xFF; - emitter->pos += 1; - emitter->code[emitter->pos] = (value >> 56) & 0xFF; - emitter->pos += 1; -} - -uint8_t get_char(void) { return (uint8_t)fgetc(stdin); } -void put_char(uint8_t v) { fputc(v, stdout); } - -void emitter_emit_expr(Emitter* emitter, Expr* expr) -{ - emitter->cmp_flags_set = false; - switch (expr->type) { - case ExprType_Error: - fprintf(stderr, "panic: emitter: program contained errors\n"); - exit(1); - break; - case ExprType_Incr: - // add BYTE [rbx], 1 - emitter_push_u8(emitter, 0x80); - emitter_push_u8(emitter, 0x03); - emitter_push_u8(emitter, expr->value); - emitter->cmp_flags_set = true; - break; - case ExprType_Decr: - // sub BYTE [rbx], 1 - emitter_push_u8(emitter, 0x80); - emitter_push_u8(emitter, 0x2b); - emitter_push_u8(emitter, expr->value); - emitter->cmp_flags_set = true; - break; - case ExprType_Left: - // sub rbx, 1 - emitter_push_u8(emitter, 0x48); - emitter_push_u8(emitter, 0x83); - emitter_push_u8(emitter, 0xeb); - emitter_push_u8(emitter, expr->value); - break; - case ExprType_Right: - // add rbx, 1 - emitter_push_u8(emitter, 0x48); - emitter_push_u8(emitter, 0x83); - emitter_push_u8(emitter, 0xc3); - emitter_push_u8(emitter, expr->value); - break; - case ExprType_Output: - // movzx edi, BYTE [rbx] - emitter_push_u8(emitter, 0x0f); - emitter_push_u8(emitter, 0xb6); - emitter_push_u8(emitter, 0x3b); - // movabs rax, - emitter_push_u8(emitter, 0x48); - emitter_push_u8(emitter, 0xb8); - emitter_push_u64(emitter, (uint64_t)put_char); - // call rax - emitter_push_u8(emitter, 0xff); - emitter_push_u8(emitter, 0xd0); - break; - case ExprType_Input: - // movabs rax, - emitter_push_u8(emitter, 0x48); - emitter_push_u8(emitter, 0xb8); - emitter_push_u64(emitter, (uint64_t)get_char); - // call rax - emitter_push_u8(emitter, 0xff); - emitter_push_u8(emitter, 0xd0); - // mov BYTE [rbx], al - emitter_push_u8(emitter, 0x88); - emitter_push_u8(emitter, 0x03); - break; - case ExprType_Loop: - fprintf(stderr, "panic: emitter: unexpected loop\n"); - exit(1); - break; - case ExprType_Zero: - // mov BYTE [rbx], 0 - emitter_push_u8(emitter, 0xc6); - emitter_push_u8(emitter, 0x03); - emitter_push_u8(emitter, 0x00); - emitter->cmp_flags_set = true; - break; - } -} - -void emitter_emit_expr_vec(Emitter* emitter, ExprVec* vec); - -void emitter_emit_loop(Emitter* emitter, Expr* expr) -{ - int64_t start_loc = (int64_t)&emitter->code[emitter->pos]; - emitter_emit_expr_vec(emitter, &expr->exprs); - if (!emitter->cmp_flags_set) { - // cmp BYTE [rbx], 0 - emitter_push_u8(emitter, 0x80); - emitter_push_u8(emitter, 0x3b); - emitter_push_u8(emitter, 0x00); - } - - int64_t current_loc = (int64_t)&emitter->code[emitter->pos]; - int32_t relative_address = -(int32_t)(current_loc - start_loc); - if (relative_address >= -127) { - // jne - emitter_push_u8(emitter, 0x75); - emitter_push_u8(emitter, (uint8_t)relative_address - 2); - } else { - // jne - emitter_push_u8(emitter, 0x0f); - emitter_push_u8(emitter, 0x85); - emitter_push_u32(emitter, (uint32_t)relative_address - 6); - } -} - -void emitter_emit_expr_vec(Emitter* emitter, ExprVec* vec) -{ - for (size_t i = 0; i < vec->length; ++i) { - Expr* expr = &vec->data[i]; - if (expr->type == ExprType_Loop) { - emitter_emit_loop(emitter, expr); - } else { - emitter_emit_expr(emitter, expr); - } - } -} - -void emitter_emit_program(Emitter* emitter, ExprVec* program) -{ - // push rbp: - emitter_push_u8(emitter, 0x55); - // mov rbp, rsp - emitter_push_u8(emitter, 0x48); - emitter_push_u8(emitter, 0x89); - emitter_push_u8(emitter, 0xe5); - // push rbx: - emitter_push_u8(emitter, 0x53); - // mov rbx, rdi - emitter_push_u8(emitter, 0x48); - emitter_push_u8(emitter, 0x89); - emitter_push_u8(emitter, 0xfb); - - emitter_emit_expr_vec(emitter, program); - - // pop rbx - emitter_push_u8(emitter, 0x5b); - // pop rbx - emitter_push_u8(emitter, 0x5d); - // ret - emitter_push_u8(emitter, 0xc3); -} - -int main(void) -{ - // const char* text - // = "+++[>[-]++++++++++++++++++++++++++++++++++++++++++++++++++" - // "++++++++++++++++++" - // "++++.---.++++++++++++++++++++.[-]++++++++++.<-]"; - const char* text = ">++[<+++++++++++++>-]<[[>+>+<<-]>[<+>-]++++++++" - "[>++++++++<-]>.[-]<<>++++++++++[>++++++++++[>++" - "++++++++[>++++++++++[>++++++++++[>++++++++++[>+" - "+++++++++[-]<-]<-]<-]<-]<-]<-]<-]++++++++++."; - + const char* text = "++++++++++[>+<-]"; printf("\ntext:%s\n\"%s\"%s\n", color_bright_green, text, color_reset); + Parser parser = parser_create(lexer_from_string(text, strlen(text))); + + // Parser parser = parser_create(lexer_from_args_or_stdin(argc, argv)); char* ast_string = malloc(sizeof(char) * 33768); ast_string[0] = '\0'; - Parser parser = parser_create(text, strlen(text)); ExprVec ast = parser_parse(&parser); { expr_vec_stringify(&ast, ast_string, 0); @@ -787,7 +43,7 @@ int main(void) expr_vec_free(&previous_ast); } previous_ast = ast; - ast = expr_vec_optimize_fold_adjecent(&ast); + ast = optimize_fold_adjecent(&ast); printf("%sfold_adjecent:%s\n", color_bold, color_reset); if (!expr_vec_equal(&ast, &previous_ast)) { ast_string[0] = '\0'; @@ -797,7 +53,7 @@ int main(void) expr_vec_free(&previous_ast); previous_ast = ast; - ast = expr_vec_optimize_eliminate_negation(&ast); + ast = optimize_eliminate_negation(&ast); printf("%seliminate_negation:%s\n", color_bold, color_reset); if (!expr_vec_equal(&ast, &previous_ast)) { ast_string[0] = '\0'; @@ -807,7 +63,7 @@ int main(void) expr_vec_free(&previous_ast); previous_ast = ast; - ast = expr_vec_optimize_eliminate_overflow(&ast); + ast = optimize_eliminate_overflow(&ast); printf("%seliminate_overflow:%s\n", color_bold, color_reset); if (!expr_vec_equal(&ast, &previous_ast)) { ast_string[0] = '\0'; @@ -817,7 +73,7 @@ int main(void) expr_vec_free(&previous_ast); previous_ast = ast; - ast = expr_vec_optimize_replace_zeroing_loops(&ast); + ast = optimize_replace_zeroing_loops(&ast); printf("%sreplace_zeroing_loops:%s\n", color_bold, color_reset); if (!expr_vec_equal(&ast, &previous_ast)) { ast_string[0] = '\0'; @@ -835,7 +91,7 @@ int main(void) printf("\n%sfinal:%s\n%s\n", color_bold, color_reset, ast_string); size_t code_size = 33678; - uint8_t* code = mmap( + void* code = mmap( NULL, code_size, PROT_READ | PROT_WRITE | PROT_EXEC, @@ -854,13 +110,11 @@ int main(void) uint8_t* memory = malloc(30000); memset(memory, 0, 30000); - void (*runnable)(uint8_t* memory) = (void (*)(uint8_t* memory))(void*)code; - printf("\n%scode:%s\n", color_bold, color_reset); for (size_t y = 0; y < 8; ++y) { for (size_t x = 0; x < 8; ++x) { - uint8_t v = code[y * 8 + x]; + uint8_t v = ((uint8_t*)code)[y * 8 + x]; if (v == 0) { fputs(color_gray, stdout); } @@ -872,6 +126,11 @@ int main(void) printf("\n%sresult:%s\n", color_bold, color_reset); +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wpedantic" + void (*runnable)(uint8_t* memory) = (void (*)(uint8_t* memory))code; +#pragma GCC diagnostic pop + runnable(memory); printf("\n%smemory:%s\n", color_bold, color_reset); diff --git a/optimizer.c b/optimizer.c new file mode 100644 index 0000000..d8e9804 --- /dev/null +++ b/optimizer.c @@ -0,0 +1,222 @@ +#include "optimizer.h" +#include "expr.h" + +/* + * fold adjecent + * + * A(N) :: { Incr(N) | Decr(N) | Right(N) | Left(N) } + * + * [A(N1) A(N2)] -> [A(N1 + N2)] + * + */ + +ExprVec optimize_fold_adjecent(const ExprVec* vec) +{ + ExprVec exprs; + expr_vec_construct(&exprs); + if (vec->length == 0) { + return exprs; + } + Expr a = expr_optimize_fold_adjecent(&vec->data[0]); + for (size_t i = 1; i < vec->length; ++i) { + Expr b = expr_optimize_fold_adjecent(&vec->data[i]); + switch (a.type) { + case ExprType_Incr: + case ExprType_Decr: + case ExprType_Left: + case ExprType_Right: + if (a.type != b.type) { + expr_vec_push(&exprs, a); + a = b; + } else { + a.value += b.value; + } + break; + default: + expr_vec_push(&exprs, a); + a = b; + } + } + expr_vec_push(&exprs, a); + return exprs; +} + +Expr expr_optimize_fold_adjecent(const Expr* expr) +{ + if (expr->type == ExprType_Loop) { + return (Expr) { + .type = ExprType_Loop, + .exprs = optimize_fold_adjecent(&expr->exprs), + }; + } else { + return *expr; + } +} + +/* + * eliminate negation + * + * A(N), B(N) :: { Incr(N) | Decr(N) | Right(N) | Left(N) } + * + * [A(N) B(N)] = [] + * + * [A(N1) B(N2)] ? N1 == N2 -> [] + * [A(N1) B(N2)] ? N1 < N2 -> [B(N2 - N1)] + * [A(N1) B(N2)] ? N1 > N2 -> [A(N1 - N2)] + * + */ + +ExprVec optimize_eliminate_negation(const ExprVec* vec) +{ + ExprVec exprs; + expr_vec_construct(&exprs); + if (vec->length == 0) { + return exprs; + } + expr_vec_push(&exprs, expr_optimize_eliminate_negation(&vec->data[0])); + for (size_t i = 1; i < vec->length; ++i) { + expr_vec_push(&exprs, expr_optimize_eliminate_negation(&vec->data[i])); + Expr* a = &exprs.data[i - 1]; + Expr* b = &exprs.data[i]; + if (a->type == ExprType_Incr && b->type == ExprType_Decr) { + if (a->value > b->value) { + a->value -= b->value; + expr_vec_pop(&exprs); + } else if (a->value < b->value) { + *a = (Expr) { .type = ExprType_Decr, + .value = b->value - a->value }; + expr_vec_pop(&exprs); + } else { + expr_vec_pop(&exprs); + expr_vec_pop(&exprs); + } + } else if (a->type == ExprType_Decr && b->type == ExprType_Incr) { + if (a->value > b->value) { + a->value -= b->value; + expr_vec_pop(&exprs); + } else if (a->value < b->value) { + *a = (Expr) { .type = ExprType_Incr, + .value = b->value - a->value }; + expr_vec_pop(&exprs); + } else { + expr_vec_pop(&exprs); + expr_vec_pop(&exprs); + } + } + if (a->type == ExprType_Left && b->type == ExprType_Right) { + if (a->value > b->value) { + a->value -= b->value; + expr_vec_pop(&exprs); + } else if (a->value < b->value) { + *a = (Expr) { .type = ExprType_Right, + .value = b->value - a->value }; + expr_vec_pop(&exprs); + } else { + expr_vec_pop(&exprs); + expr_vec_pop(&exprs); + } + } else if (a->type == ExprType_Right && b->type == ExprType_Left) { + if (a->value > b->value) { + a->value -= b->value; + expr_vec_pop(&exprs); + } else if (a->value < b->value) { + *a = (Expr) { .type = ExprType_Left, + .value = b->value - a->value }; + expr_vec_pop(&exprs); + } else { + expr_vec_pop(&exprs); + expr_vec_pop(&exprs); + } + } + } + return exprs; +} + +Expr expr_optimize_eliminate_negation(const Expr* expr) +{ + if (expr->type == ExprType_Loop) { + return (Expr) { + .type = ExprType_Loop, + .exprs = optimize_eliminate_negation(&expr->exprs), + }; + } else { + return *expr; + } +} + +/* + * eliminate overflow + * + * A(N) :: { Incr(N) | Decr(N) | Right(N) | Left(N) } + * + * N > 255 + * + * A(N) -> A(N % 256) + * + */ + +ExprVec optimize_eliminate_overflow(const ExprVec* vec) +{ + ExprVec exprs; + expr_vec_construct(&exprs); + for (size_t i = 0; i < vec->length; ++i) { + expr_vec_push(&exprs, expr_optimize_eliminate_overflow(&vec->data[i])); + } + return exprs; +} + +Expr expr_optimize_eliminate_overflow(const Expr* expr) +{ + if (expr->type == ExprType_Loop) { + return (Expr) { + .type = ExprType_Loop, + .exprs = optimize_eliminate_overflow(&expr->exprs), + }; + } else if (expr->value > 255) { + return (Expr) { .type = expr->type, .value = expr->value % 256 }; + } else { + return *expr; + } +} + +/* + * replace zeroing loops + * + * A(N) :: { Incr(N) | Decr(N) } + * + * N % 2 == 1 + * + * Loop[A(N)] -> Zero + * + */ + +ExprVec optimize_replace_zeroing_loops(const ExprVec* vec) +{ + ExprVec exprs; + expr_vec_construct(&exprs); + for (size_t i = 0; i < vec->length; ++i) { + expr_vec_push( + &exprs, expr_optimize_replace_zeroing_loops(&vec->data[i]) + ); + } + return exprs; +} + +Expr expr_optimize_replace_zeroing_loops(const Expr* expr) +{ + if (expr->type == ExprType_Loop) { + if (expr->exprs.length == 1 + && (expr->exprs.data[0].type == ExprType_Incr + || expr->exprs.data[0].type == ExprType_Decr) + && expr->exprs.data[0].value % 2 != 0) { + return (Expr) { .type = ExprType_Zero }; + } else { + return (Expr) { + .type = ExprType_Loop, + .exprs = optimize_replace_zeroing_loops(&expr->exprs), + }; + } + } else { + return *expr; + } +} diff --git a/optimizer.h b/optimizer.h new file mode 100644 index 0000000..6ed7c65 --- /dev/null +++ b/optimizer.h @@ -0,0 +1,18 @@ +#ifndef OPTIMIZER_H +#define OPTIMIZER_H + +#include "expr.h" + +ExprVec optimize_fold_adjecent(const ExprVec* vec); +Expr expr_optimize_fold_adjecent(const Expr* expr); + +ExprVec optimize_eliminate_negation(const ExprVec* vec); +Expr expr_optimize_eliminate_negation(const Expr* expr); + +ExprVec optimize_eliminate_overflow(const ExprVec* vec); +Expr expr_optimize_eliminate_overflow(const Expr* expr); + +ExprVec optimize_replace_zeroing_loops(const ExprVec* vec); +Expr expr_optimize_replace_zeroing_loops(const Expr* expr); + +#endif diff --git a/parser.c b/parser.c new file mode 100644 index 0000000..ea31e87 --- /dev/null +++ b/parser.c @@ -0,0 +1,169 @@ +#include "parser.h" +#include + +Lexer lexer_from_string(const char* text, size_t length) +{ + Lexer lexer = (Lexer) { + .type = LexerType_String, + .current = text[0], + .string = { + .text = text, + .index = 0, + .length = length, + }, + }; + return lexer; +} + +Lexer lexer_from_file(FILE* file) +{ + Lexer lexer = (Lexer) { + .type = LexerType_File, + .current = 1, + .file = file, + }; + lexer_step(&lexer); + return lexer; +} + +Lexer lexer_from_args_or_stdin(int argc, char** argv) +{ + if (argc >= 2) { + FILE* file = fopen(argv[1], "r"); + if (!file) { + fprintf(stderr, "panic: could not open file \"%s\"\n", argv[1]); + exit(1); + } + return lexer_from_file(file); + } else { + return lexer_from_file(stdin); + } +} + +bool lexer_done(Lexer* lexer) +{ + switch (lexer->type) { + case LexerType_String: + return lexer->string.index >= lexer->string.length; + case LexerType_File: + return lexer->current == '\0'; + } + exit(1); +} + +void lexer_step(Lexer* lexer) +{ + if (lexer_done(lexer)) { + return; + } + switch (lexer->type) { + case LexerType_String: + lexer->string.index += 1; + lexer->current = lexer->string.text[lexer->string.index]; + break; + case LexerType_File: { + int c = fgetc(lexer->file); + lexer->current = c != EOF ? (char)c : '\0'; + } break; + } +} + +Token lexer_next(Lexer* lexer) +{ + if (lexer_done(lexer)) { + return Token_Eof; + } + switch (lexer->current) { + case '+': + return (lexer_step(lexer), Token_Plus); + case '-': + return (lexer_step(lexer), Token_Minus); + case '<': + return (lexer_step(lexer), Token_LT); + case '>': + return (lexer_step(lexer), Token_GT); + case '.': + return (lexer_step(lexer), Token_Dot); + case ',': + return (lexer_step(lexer), Token_Comma); + case '[': + return (lexer_step(lexer), Token_LBracket); + case ']': + return (lexer_step(lexer), Token_RBracket); + default: + return (lexer_step(lexer), lexer_next(lexer)); + } +} + +Parser parser_create(Lexer lexer) +{ + return (Parser) { + .lexer = lexer, + .current = lexer_next(&lexer), + }; +} + +void parser_step(Parser* parser) +{ + parser->current = lexer_next(&parser->lexer); +} +Expr parser_parse_expr(Parser* parser); + +Expr parser_parse_loop(Parser* parser) +{ + parser_step(parser); + ExprVec exprs; + expr_vec_construct(&exprs); + while (parser->current != Token_Eof && parser->current != Token_RBracket) { + expr_vec_push(&exprs, parser_parse_expr(parser)); + } + if (parser->current != Token_RBracket) { + return (Expr) { .type = ExprType_Error }; + } + parser_step(parser); + return (Expr) { .type = ExprType_Loop, .exprs = exprs }; +} + +Expr parser_parse_expr(Parser* parser) +{ + switch (parser->current) { + case Token_Plus: + return ( + parser_step(parser), + (Expr) { .type = ExprType_Incr, .value = 1 } + ); + case Token_Minus: + return ( + parser_step(parser), + (Expr) { .type = ExprType_Decr, .value = 1 } + ); + case Token_LT: + return ( + parser_step(parser), + (Expr) { .type = ExprType_Left, .value = 1 } + ); + case Token_GT: + return ( + parser_step(parser), + (Expr) { .type = ExprType_Right, .value = 1 } + ); + case Token_Dot: + return (parser_step(parser), (Expr) { .type = ExprType_Output }); + case Token_Comma: + return (parser_step(parser), (Expr) { .type = ExprType_Input }); + case Token_LBracket: + return parser_parse_loop(parser); + default: + return (parser_step(parser), (Expr) { .type = ExprType_Error }); + } +} + +ExprVec parser_parse(Parser* parser) +{ + ExprVec exprs; + expr_vec_construct(&exprs); + while (parser->current != Token_Eof) { + expr_vec_push(&exprs, parser_parse_expr(parser)); + } + return exprs; +} diff --git a/parser.h b/parser.h new file mode 100644 index 0000000..ff820b2 --- /dev/null +++ b/parser.h @@ -0,0 +1,55 @@ +#ifndef PARSER_H +#define PARSER_H + +#include "expr.h" +#include + +typedef enum { + Token_Eof, + Token_Plus, + Token_Minus, + Token_LT, + Token_GT, + Token_Dot, + Token_Comma, + Token_LBracket, + Token_RBracket +} Token; + +typedef enum { + LexerType_String, + LexerType_File, +} LexerType; + +typedef struct { + LexerType type; + char current; + union { + struct { + const char* text; + size_t index; + size_t length; + } string; + FILE* file; + }; +} Lexer; + +Lexer lexer_from_string(const char* text, size_t length); +Lexer lexer_from_file(FILE* file); +Lexer lexer_from_args_or_stdin(int argc, char** argv); +bool lexer_done(Lexer* lexer); +void lexer_step(Lexer* lexer); +Token lexer_next(Lexer* lexer); + +typedef struct { + Lexer lexer; + Token current; +} Parser; + +Parser parser_create(Lexer lexer); +void parser_step(Parser* parser); +Expr parser_parse_loop(Parser* parser); +Expr parser_parse_expr(Parser* parser); +ExprVec parser_parse(Parser* parser); + +#endif diff --git a/print.h b/print.h new file mode 100644 index 0000000..46151a2 --- /dev/null +++ b/print.h @@ -0,0 +1,27 @@ +#ifndef COLOR_H +#define COLOR_H + +#define MAYBE_UNUSED __attribute__((unused)) + +MAYBE_UNUSED static const char* color_reset = "\x1b[0m"; +MAYBE_UNUSED static const char* color_bold = "\x1b[1m"; + +MAYBE_UNUSED static const char* color_black = "\x1b[30m"; +MAYBE_UNUSED static const char* color_red = "\x1b[31m"; +MAYBE_UNUSED static const char* color_green = "\x1b[32m"; +MAYBE_UNUSED static const char* color_yellow = "\x1b[33m"; +MAYBE_UNUSED static const char* color_blue = "\x1b[34m"; +MAYBE_UNUSED static const char* color_magenta = "\x1b[35m"; +MAYBE_UNUSED static const char* color_cyan = "\x1b[36m"; +MAYBE_UNUSED static const char* color_bright_gray = "\x1b[37m"; + +MAYBE_UNUSED static const char* color_gray = "\x1b[90m"; +MAYBE_UNUSED static const char* color_bright_red = "\x1b[91m"; +MAYBE_UNUSED static const char* color_bright_green = "\x1b[92m"; +MAYBE_UNUSED static const char* color_bright_yellow = "\x1b[93m"; +MAYBE_UNUSED static const char* color_bright_blue = "\x1b[94m"; +MAYBE_UNUSED static const char* color_bright_magenta = "\x1b[95m"; +MAYBE_UNUSED static const char* color_bright_cyan = "\x1b[96m"; +MAYBE_UNUSED static const char* color_white = "\x1b[97m"; + +#endif diff --git a/runtime.c b/runtime.c new file mode 100644 index 0000000..a04ce90 --- /dev/null +++ b/runtime.c @@ -0,0 +1,7 @@ +#include "runtime.h" +#include +#include + +uint8_t get_char(void) { return (uint8_t)fgetc(stdin); } + +void put_char(uint8_t v) { fputc(v, stdout); } diff --git a/runtime.h b/runtime.h new file mode 100644 index 0000000..5ad0369 --- /dev/null +++ b/runtime.h @@ -0,0 +1,9 @@ +#ifndef RUNTIME_H +#define RUNTIME_H + +#include + +uint8_t get_char(void); +void put_char(uint8_t v); + +#endif