// parser.c #include "parse.h" #include #include #include #include #include typedef enum { TOKEN_LPAREN, TOKEN_RPAREN, TOKEN_IDENT, TOKEN_INT, TOKEN_EOF, } TokenType; typedef struct { TokenType type; char* text; } Token; typedef struct { const char* input; size_t pos; } Lexer; typedef struct { Lexer lexer; Token current; } Parser; /* ========================= Lexer ========================= */ static char current_char(Lexer* lexer) { return lexer->input[lexer->pos]; } static void advance(Lexer* lexer) { if (current_char(lexer) != '\0') { lexer->pos++; } } static void skip_whitespace(Lexer* lexer) { while (isspace(current_char(lexer))) { advance(lexer); } } static Token make_ident(Lexer* lexer) { size_t start = lexer->pos; while (isalnum(current_char(lexer)) || current_char(lexer) == '_') { advance(lexer); } return (Token) { .type = TOKEN_IDENT, .text = strndup(lexer->input + start, lexer->pos - start), }; } static Token make_int(Lexer* lexer) { size_t start = lexer->pos; while (isdigit(current_char(lexer))) { advance(lexer); } return (Token) { .type = TOKEN_INT, .text = strndup(lexer->input + start, lexer->pos - start), }; } static Token next_token(Lexer* lexer) { skip_whitespace(lexer); char c = current_char(lexer); switch (c) { case '\0': return (Token) { .type = TOKEN_EOF, .text = NULL, }; case '(': advance(lexer); return (Token) { .type = TOKEN_LPAREN, .text = NULL, }; case ')': advance(lexer); return (Token) { .type = TOKEN_RPAREN, .text = NULL, }; default: break; } if (isalpha(c) || c == '_') { return make_ident(lexer); } if (isdigit(c)) { return make_int(lexer); } fprintf(stderr, "Unexpected character: '%c'\n", c); exit(EXIT_FAILURE); } /* ========================= Parser ========================= */ static void parser_advance(Parser* parser) { free(parser->current.text); parser->current = next_token(&parser->lexer); } static void parser_expect(Parser* parser, TokenType expected) { if (parser->current.type != expected) { fprintf(stderr, "Unexpected token\n"); exit(EXIT_FAILURE); } } static Expr* make_atom_expr(ExprType type, char* text) { Expr* expr = malloc(sizeof(Expr)); expr->type = type; expr->text = text; return expr; } static Expr* make_sexpr(void) { Expr* expr = malloc(sizeof(Expr)); expr->type = EXPR_SEXPR; expr->sexpr.items = NULL; expr->sexpr.count = 0; return expr; } static void sexpr_push(Expr* sexpr, Expr* item) { sexpr->sexpr.items = realloc(sexpr->sexpr.items, sizeof(Expr*) * (sexpr->sexpr.count + 1)); sexpr->sexpr.items[sexpr->sexpr.count++] = item; } static Expr* parse_expr(Parser* parser); static Expr* parse_list(Parser* parser) { parser_expect(parser, TOKEN_LPAREN); parser_advance(parser); Expr* sexpr = make_sexpr(); while (parser->current.type != TOKEN_RPAREN) { Expr* expr = parse_expr(parser); sexpr_push(sexpr, expr); } parser_expect(parser, TOKEN_RPAREN); parser_advance(parser); return sexpr; } static Expr* parse_expr(Parser* parser) { switch (parser->current.type) { case TOKEN_IDENT: { char* text = strdup(parser->current.text); parser_advance(parser); return make_atom_expr(EXPR_IDENT, text); } case TOKEN_INT: { char* text = strdup(parser->current.text); parser_advance(parser); return make_atom_expr(EXPR_INT, text); } case TOKEN_LPAREN: return parse_list(parser); default: fprintf(stderr, "Unexpected token in expression\n"); exit(EXIT_FAILURE); } } /* ========================= Public API ========================= */ Expr* parse(const char* source) { Parser parser = { .lexer = { .input = source, .pos = 0, }, .current = {0}, }; parser.current = next_token(&parser.lexer); Expr* expr = parse_expr(&parser); if (parser.current.type != TOKEN_EOF) { fprintf(stderr, "Expected EOF\n"); exit(EXIT_FAILURE); } free(parser.current.text); return expr; } /* ========================= Debug Printing ========================= */ static void print_sexpr(Expr* expr) { printf("SExpr [ "); for (size_t i = 0; i < expr->sexpr.count; i++) { expr_print(expr->sexpr.items[i]); if (i + 1 < expr->sexpr.count) { printf(", "); } } printf(" ]"); } void expr_print(Expr* expr) { switch (expr->type) { case EXPR_IDENT: printf("Ident(\"%s\")", expr->text); break; case EXPR_INT: printf("Int(%s)", expr->text); break; case EXPR_SEXPR: print_sexpr(expr); break; } } /* ========================= Memory Cleanup ========================= */ void expr_free(Expr* expr) { switch (expr->type) { case EXPR_IDENT: case EXPR_INT: free(expr->text); break; case EXPR_SEXPR: for (size_t i = 0; i < expr->sexpr.count; i++) { expr_free(expr->sexpr.items[i]); } free(expr->sexpr.items); break; } free(expr); } /* ========================= Unit Tests ========================= */ static void test_simple_list(void) { Expr* expr = parse("(add 2)"); assert(expr->type == EXPR_SEXPR); assert(expr->sexpr.count == 2); assert(expr->sexpr.items[0]->type == EXPR_IDENT); assert(strcmp(expr->sexpr.items[0]->text, "add") == 0); assert(expr->sexpr.items[1]->type == EXPR_INT); assert(strcmp(expr->sexpr.items[1]->text, "2") == 0); expr_free(expr); } static void test_nested_list(void) { Expr* expr = parse("(add 2 (mul 3 4))"); assert(expr->type == EXPR_SEXPR); assert(expr->sexpr.count == 3); Expr* nested = expr->sexpr.items[2]; assert(nested->type == EXPR_SEXPR); assert(nested->sexpr.count == 3); assert(strcmp(nested->sexpr.items[0]->text, "mul") == 0); assert(strcmp(nested->sexpr.items[1]->text, "3") == 0); assert(strcmp(nested->sexpr.items[2]->text, "4") == 0); expr_free(expr); } void test_parse(void) { test_simple_list(); test_nested_list(); }