355 lines
6.7 KiB
C
355 lines
6.7 KiB
C
// parser.c
|
|
#include "parse.h"
|
|
#include <assert.h>
|
|
#include <ctype.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
typedef enum {
|
|
TOKEN_LPAREN,
|
|
TOKEN_RPAREN,
|
|
TOKEN_IDENT,
|
|
TOKEN_INT,
|
|
TOKEN_EOF,
|
|
} TokenType;
|
|
|
|
typedef struct {
|
|
TokenType type;
|
|
char* text;
|
|
} Token;
|
|
|
|
typedef struct {
|
|
const char* input;
|
|
size_t pos;
|
|
} Lexer;
|
|
|
|
typedef struct {
|
|
Lexer lexer;
|
|
Token current;
|
|
} Parser;
|
|
|
|
/* =========================
|
|
Lexer
|
|
========================= */
|
|
|
|
static char current_char(Lexer* lexer)
|
|
{
|
|
return lexer->input[lexer->pos];
|
|
}
|
|
|
|
static void advance(Lexer* lexer)
|
|
{
|
|
if (current_char(lexer) != '\0') {
|
|
lexer->pos++;
|
|
}
|
|
}
|
|
|
|
static void skip_whitespace(Lexer* lexer)
|
|
{
|
|
while (isspace(current_char(lexer))) {
|
|
advance(lexer);
|
|
}
|
|
}
|
|
|
|
static Token make_ident(Lexer* lexer)
|
|
{
|
|
size_t start = lexer->pos;
|
|
|
|
while (isalnum(current_char(lexer)) || current_char(lexer) == '_') {
|
|
advance(lexer);
|
|
}
|
|
|
|
return (Token) {
|
|
.type = TOKEN_IDENT,
|
|
.text = strndup(lexer->input + start, lexer->pos - start),
|
|
};
|
|
}
|
|
|
|
static Token make_int(Lexer* lexer)
|
|
{
|
|
size_t start = lexer->pos;
|
|
|
|
while (isdigit(current_char(lexer))) {
|
|
advance(lexer);
|
|
}
|
|
|
|
return (Token) {
|
|
.type = TOKEN_INT,
|
|
.text = strndup(lexer->input + start, lexer->pos - start),
|
|
};
|
|
}
|
|
|
|
static Token next_token(Lexer* lexer)
|
|
{
|
|
skip_whitespace(lexer);
|
|
|
|
char c = current_char(lexer);
|
|
|
|
switch (c) {
|
|
case '\0':
|
|
return (Token) {
|
|
.type = TOKEN_EOF,
|
|
.text = NULL,
|
|
};
|
|
|
|
case '(':
|
|
advance(lexer);
|
|
|
|
return (Token) {
|
|
.type = TOKEN_LPAREN,
|
|
.text = NULL,
|
|
};
|
|
|
|
case ')':
|
|
advance(lexer);
|
|
|
|
return (Token) {
|
|
.type = TOKEN_RPAREN,
|
|
.text = NULL,
|
|
};
|
|
|
|
default:
|
|
break;
|
|
}
|
|
|
|
if (isalpha(c) || c == '_') {
|
|
return make_ident(lexer);
|
|
}
|
|
|
|
if (isdigit(c)) {
|
|
return make_int(lexer);
|
|
}
|
|
|
|
fprintf(stderr, "Unexpected character: '%c'\n", c);
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
|
|
/* =========================
|
|
Parser
|
|
========================= */
|
|
|
|
static void parser_advance(Parser* parser)
|
|
{
|
|
free(parser->current.text);
|
|
parser->current = next_token(&parser->lexer);
|
|
}
|
|
|
|
static void parser_expect(Parser* parser, TokenType expected)
|
|
{
|
|
if (parser->current.type != expected) {
|
|
fprintf(stderr, "Unexpected token\n");
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
}
|
|
|
|
static Expr* make_atom_expr(ExprType type, char* text)
|
|
{
|
|
Expr* expr = malloc(sizeof(Expr));
|
|
|
|
expr->type = type;
|
|
expr->text = text;
|
|
|
|
return expr;
|
|
}
|
|
|
|
static Expr* make_sexpr(void)
|
|
{
|
|
Expr* expr = malloc(sizeof(Expr));
|
|
|
|
expr->type = EXPR_SEXPR;
|
|
expr->sexpr.items = NULL;
|
|
expr->sexpr.count = 0;
|
|
|
|
return expr;
|
|
}
|
|
|
|
static void sexpr_push(Expr* sexpr, Expr* item)
|
|
{
|
|
sexpr->sexpr.items
|
|
= realloc(sexpr->sexpr.items, sizeof(Expr*) * (sexpr->sexpr.count + 1));
|
|
|
|
sexpr->sexpr.items[sexpr->sexpr.count++] = item;
|
|
}
|
|
|
|
static Expr* parse_expr(Parser* parser);
|
|
|
|
static Expr* parse_list(Parser* parser)
|
|
{
|
|
parser_expect(parser, TOKEN_LPAREN);
|
|
parser_advance(parser);
|
|
|
|
Expr* sexpr = make_sexpr();
|
|
|
|
while (parser->current.type != TOKEN_RPAREN) {
|
|
Expr* expr = parse_expr(parser);
|
|
sexpr_push(sexpr, expr);
|
|
}
|
|
|
|
parser_expect(parser, TOKEN_RPAREN);
|
|
parser_advance(parser);
|
|
|
|
return sexpr;
|
|
}
|
|
|
|
static Expr* parse_expr(Parser* parser)
|
|
{
|
|
switch (parser->current.type) {
|
|
case TOKEN_IDENT: {
|
|
char* text = strdup(parser->current.text);
|
|
|
|
parser_advance(parser);
|
|
|
|
return make_atom_expr(EXPR_IDENT, text);
|
|
}
|
|
|
|
case TOKEN_INT: {
|
|
char* text = strdup(parser->current.text);
|
|
|
|
parser_advance(parser);
|
|
|
|
return make_atom_expr(EXPR_INT, text);
|
|
}
|
|
|
|
case TOKEN_LPAREN:
|
|
return parse_list(parser);
|
|
|
|
default:
|
|
fprintf(stderr, "Unexpected token in expression\n");
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
}
|
|
|
|
/* =========================
|
|
Public API
|
|
========================= */
|
|
|
|
Expr* parse(const char* source)
|
|
{
|
|
Parser parser = {
|
|
.lexer = {
|
|
.input = source,
|
|
.pos = 0,
|
|
},
|
|
.current = {0},
|
|
};
|
|
|
|
parser.current = next_token(&parser.lexer);
|
|
|
|
Expr* expr = parse_expr(&parser);
|
|
|
|
if (parser.current.type != TOKEN_EOF) {
|
|
fprintf(stderr, "Expected EOF\n");
|
|
exit(EXIT_FAILURE);
|
|
}
|
|
|
|
free(parser.current.text);
|
|
|
|
return expr;
|
|
}
|
|
|
|
/* =========================
|
|
Debug Printing
|
|
========================= */
|
|
|
|
static void print_sexpr(Expr* expr)
|
|
{
|
|
printf("SExpr [ ");
|
|
|
|
for (size_t i = 0; i < expr->sexpr.count; i++) {
|
|
expr_print(expr->sexpr.items[i]);
|
|
|
|
if (i + 1 < expr->sexpr.count) {
|
|
printf(", ");
|
|
}
|
|
}
|
|
|
|
printf(" ]");
|
|
}
|
|
|
|
void expr_print(Expr* expr)
|
|
{
|
|
switch (expr->type) {
|
|
case EXPR_IDENT:
|
|
printf("Ident(\"%s\")", expr->text);
|
|
break;
|
|
|
|
case EXPR_INT:
|
|
printf("Int(%s)", expr->text);
|
|
break;
|
|
|
|
case EXPR_SEXPR:
|
|
print_sexpr(expr);
|
|
break;
|
|
}
|
|
}
|
|
|
|
/* =========================
|
|
Memory Cleanup
|
|
========================= */
|
|
|
|
void expr_free(Expr* expr)
|
|
{
|
|
switch (expr->type) {
|
|
case EXPR_IDENT:
|
|
case EXPR_INT:
|
|
free(expr->text);
|
|
break;
|
|
|
|
case EXPR_SEXPR:
|
|
for (size_t i = 0; i < expr->sexpr.count; i++) {
|
|
expr_free(expr->sexpr.items[i]);
|
|
}
|
|
|
|
free(expr->sexpr.items);
|
|
break;
|
|
}
|
|
|
|
free(expr);
|
|
}
|
|
|
|
/* =========================
|
|
Unit Tests
|
|
========================= */
|
|
|
|
static void test_simple_list(void)
|
|
{
|
|
Expr* expr = parse("(add 2)");
|
|
|
|
assert(expr->type == EXPR_SEXPR);
|
|
assert(expr->sexpr.count == 2);
|
|
|
|
assert(expr->sexpr.items[0]->type == EXPR_IDENT);
|
|
assert(strcmp(expr->sexpr.items[0]->text, "add") == 0);
|
|
|
|
assert(expr->sexpr.items[1]->type == EXPR_INT);
|
|
assert(strcmp(expr->sexpr.items[1]->text, "2") == 0);
|
|
|
|
expr_free(expr);
|
|
}
|
|
|
|
static void test_nested_list(void)
|
|
{
|
|
Expr* expr = parse("(add 2 (mul 3 4))");
|
|
|
|
assert(expr->type == EXPR_SEXPR);
|
|
assert(expr->sexpr.count == 3);
|
|
|
|
Expr* nested = expr->sexpr.items[2];
|
|
|
|
assert(nested->type == EXPR_SEXPR);
|
|
assert(nested->sexpr.count == 3);
|
|
|
|
assert(strcmp(nested->sexpr.items[0]->text, "mul") == 0);
|
|
assert(strcmp(nested->sexpr.items[1]->text, "3") == 0);
|
|
assert(strcmp(nested->sexpr.items[2]->text, "4") == 0);
|
|
|
|
expr_free(expr);
|
|
}
|
|
|
|
void test_parse(void)
|
|
{
|
|
test_simple_list();
|
|
test_nested_list();
|
|
}
|