2026-05-21 00:31:25 +02:00

355 lines
6.7 KiB
C

// parser.c
#include "parse.h"
#include <assert.h>
#include <ctype.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef enum {
TOKEN_LPAREN,
TOKEN_RPAREN,
TOKEN_IDENT,
TOKEN_INT,
TOKEN_EOF,
} TokenType;
typedef struct {
TokenType type;
char* text;
} Token;
typedef struct {
const char* input;
size_t pos;
} Lexer;
typedef struct {
Lexer lexer;
Token current;
} Parser;
/* =========================
Lexer
========================= */
static char current_char(Lexer* lexer)
{
return lexer->input[lexer->pos];
}
static void advance(Lexer* lexer)
{
if (current_char(lexer) != '\0') {
lexer->pos++;
}
}
static void skip_whitespace(Lexer* lexer)
{
while (isspace(current_char(lexer))) {
advance(lexer);
}
}
static Token make_ident(Lexer* lexer)
{
size_t start = lexer->pos;
while (isalnum(current_char(lexer)) || current_char(lexer) == '_') {
advance(lexer);
}
return (Token) {
.type = TOKEN_IDENT,
.text = strndup(lexer->input + start, lexer->pos - start),
};
}
static Token make_int(Lexer* lexer)
{
size_t start = lexer->pos;
while (isdigit(current_char(lexer))) {
advance(lexer);
}
return (Token) {
.type = TOKEN_INT,
.text = strndup(lexer->input + start, lexer->pos - start),
};
}
static Token next_token(Lexer* lexer)
{
skip_whitespace(lexer);
char c = current_char(lexer);
switch (c) {
case '\0':
return (Token) {
.type = TOKEN_EOF,
.text = NULL,
};
case '(':
advance(lexer);
return (Token) {
.type = TOKEN_LPAREN,
.text = NULL,
};
case ')':
advance(lexer);
return (Token) {
.type = TOKEN_RPAREN,
.text = NULL,
};
default:
break;
}
if (isalpha(c) || c == '_') {
return make_ident(lexer);
}
if (isdigit(c)) {
return make_int(lexer);
}
fprintf(stderr, "Unexpected character: '%c'\n", c);
exit(EXIT_FAILURE);
}
/* =========================
Parser
========================= */
static void parser_advance(Parser* parser)
{
free(parser->current.text);
parser->current = next_token(&parser->lexer);
}
static void parser_expect(Parser* parser, TokenType expected)
{
if (parser->current.type != expected) {
fprintf(stderr, "Unexpected token\n");
exit(EXIT_FAILURE);
}
}
static Expr* make_atom_expr(ExprType type, char* text)
{
Expr* expr = malloc(sizeof(Expr));
expr->type = type;
expr->text = text;
return expr;
}
static Expr* make_sexpr(void)
{
Expr* expr = malloc(sizeof(Expr));
expr->type = EXPR_SEXPR;
expr->sexpr.items = NULL;
expr->sexpr.count = 0;
return expr;
}
static void sexpr_push(Expr* sexpr, Expr* item)
{
sexpr->sexpr.items
= realloc(sexpr->sexpr.items, sizeof(Expr*) * (sexpr->sexpr.count + 1));
sexpr->sexpr.items[sexpr->sexpr.count++] = item;
}
static Expr* parse_expr(Parser* parser);
static Expr* parse_list(Parser* parser)
{
parser_expect(parser, TOKEN_LPAREN);
parser_advance(parser);
Expr* sexpr = make_sexpr();
while (parser->current.type != TOKEN_RPAREN) {
Expr* expr = parse_expr(parser);
sexpr_push(sexpr, expr);
}
parser_expect(parser, TOKEN_RPAREN);
parser_advance(parser);
return sexpr;
}
static Expr* parse_expr(Parser* parser)
{
switch (parser->current.type) {
case TOKEN_IDENT: {
char* text = strdup(parser->current.text);
parser_advance(parser);
return make_atom_expr(EXPR_IDENT, text);
}
case TOKEN_INT: {
char* text = strdup(parser->current.text);
parser_advance(parser);
return make_atom_expr(EXPR_INT, text);
}
case TOKEN_LPAREN:
return parse_list(parser);
default:
fprintf(stderr, "Unexpected token in expression\n");
exit(EXIT_FAILURE);
}
}
/* =========================
Public API
========================= */
Expr* parse(const char* source)
{
Parser parser = {
.lexer = {
.input = source,
.pos = 0,
},
.current = {0},
};
parser.current = next_token(&parser.lexer);
Expr* expr = parse_expr(&parser);
if (parser.current.type != TOKEN_EOF) {
fprintf(stderr, "Expected EOF\n");
exit(EXIT_FAILURE);
}
free(parser.current.text);
return expr;
}
/* =========================
Debug Printing
========================= */
static void print_sexpr(Expr* expr)
{
printf("SExpr [ ");
for (size_t i = 0; i < expr->sexpr.count; i++) {
expr_print(expr->sexpr.items[i]);
if (i + 1 < expr->sexpr.count) {
printf(", ");
}
}
printf(" ]");
}
void expr_print(Expr* expr)
{
switch (expr->type) {
case EXPR_IDENT:
printf("Ident(\"%s\")", expr->text);
break;
case EXPR_INT:
printf("Int(%s)", expr->text);
break;
case EXPR_SEXPR:
print_sexpr(expr);
break;
}
}
/* =========================
Memory Cleanup
========================= */
void expr_free(Expr* expr)
{
switch (expr->type) {
case EXPR_IDENT:
case EXPR_INT:
free(expr->text);
break;
case EXPR_SEXPR:
for (size_t i = 0; i < expr->sexpr.count; i++) {
expr_free(expr->sexpr.items[i]);
}
free(expr->sexpr.items);
break;
}
free(expr);
}
/* =========================
Unit Tests
========================= */
static void test_simple_list(void)
{
Expr* expr = parse("(add 2)");
assert(expr->type == EXPR_SEXPR);
assert(expr->sexpr.count == 2);
assert(expr->sexpr.items[0]->type == EXPR_IDENT);
assert(strcmp(expr->sexpr.items[0]->text, "add") == 0);
assert(expr->sexpr.items[1]->type == EXPR_INT);
assert(strcmp(expr->sexpr.items[1]->text, "2") == 0);
expr_free(expr);
}
static void test_nested_list(void)
{
Expr* expr = parse("(add 2 (mul 3 4))");
assert(expr->type == EXPR_SEXPR);
assert(expr->sexpr.count == 3);
Expr* nested = expr->sexpr.items[2];
assert(nested->type == EXPR_SEXPR);
assert(nested->sexpr.count == 3);
assert(strcmp(nested->sexpr.items[0]->text, "mul") == 0);
assert(strcmp(nested->sexpr.items[1]->text, "3") == 0);
assert(strcmp(nested->sexpr.items[2]->text, "4") == 0);
expr_free(expr);
}
void test_parse(void)
{
test_simple_list();
test_nested_list();
}