340 lines
8.7 KiB
C
340 lines
8.7 KiB
C
#include "collections.h"
|
|
#include "json.h"
|
|
#include <assert.h>
|
|
#include <stdbool.h>
|
|
#include <stddef.h>
|
|
#include <stdint.h>
|
|
#include <stdio.h>
|
|
#include <stdlib.h>
|
|
#include <string.h>
|
|
|
|
struct loc {
|
|
size_t idx;
|
|
int line;
|
|
int col;
|
|
};
|
|
|
|
static void report(
|
|
struct loc loc, char const *message, char const *text, size_t text_len)
|
|
{
|
|
fprintf(stderr, "error: %s\n", message);
|
|
if (!text)
|
|
return;
|
|
assert(text[loc.idx] != '\n');
|
|
size_t line_begin_idx = loc.idx;
|
|
while (line_begin_idx > 0 && text[line_begin_idx] != '\n') {
|
|
line_begin_idx -= 1;
|
|
}
|
|
if (text[line_begin_idx] == '\n') {
|
|
line_begin_idx += 1;
|
|
}
|
|
size_t line_end_idx = loc.idx + 1;
|
|
while (line_end_idx < text_len && text[line_end_idx] != '\n') {
|
|
line_end_idx += 1;
|
|
}
|
|
if (line_end_idx >= text_len || text[line_end_idx] == '\n') {
|
|
line_end_idx -= 1;
|
|
}
|
|
int linenr_width = snprintf(NULL, 0, "%d", loc.line);
|
|
static char const *spaces = " ";
|
|
printf("%.*s|\n"
|
|
"%d|%.*s\n"
|
|
"%.*s|%.*s^\n"
|
|
"%.*s|\n",
|
|
linenr_width,
|
|
spaces,
|
|
loc.line,
|
|
(int)(line_end_idx - line_begin_idx + 1),
|
|
&text[line_begin_idx],
|
|
linenr_width,
|
|
spaces,
|
|
loc.col - 1,
|
|
spaces,
|
|
linenr_width,
|
|
spaces);
|
|
}
|
|
|
|
enum tokty {
|
|
tt_eof,
|
|
tt_null = json_null,
|
|
tt_false = json_false,
|
|
tt_true = json_true,
|
|
tt_string,
|
|
tt_float,
|
|
tt_int = '0',
|
|
tt_comma = ',',
|
|
tt_colon = ':',
|
|
tt_lbracket = '[',
|
|
tt_rbracket = ']',
|
|
tt_lbrace = '{',
|
|
tt_rbrace = '}',
|
|
};
|
|
|
|
struct tok {
|
|
enum tokty ty;
|
|
char const *ptr;
|
|
size_t len;
|
|
struct loc loc;
|
|
};
|
|
|
|
struct tokenizer {
|
|
char const *text;
|
|
size_t len;
|
|
size_t idx;
|
|
int line;
|
|
int col;
|
|
bool failed;
|
|
};
|
|
|
|
static void t_step(struct tokenizer *t)
|
|
{
|
|
if (t->idx >= t->len)
|
|
return;
|
|
if (t->text[t->idx] == '\n') {
|
|
t->line += 1;
|
|
t->col = 1;
|
|
} else {
|
|
t->col += 1;
|
|
}
|
|
t->idx += 1;
|
|
}
|
|
|
|
static struct tok t_tok(struct tokenizer *t, enum tokty ty, struct loc loc)
|
|
{
|
|
return (struct tok) { ty, &t->text[loc.idx], t->idx - loc.idx, loc };
|
|
}
|
|
|
|
static struct tok tokenizer_next(struct tokenizer *t);
|
|
|
|
static struct tok t_make_ident_tok(
|
|
struct tokenizer *t, struct loc loc, size_t *i)
|
|
{
|
|
char const *kws[] = { "null", "false", "true" };
|
|
enum tokty tys[] = { tt_null, tt_false, tt_true };
|
|
for (size_t kw_i = 0; kw_i < sizeof(kws) / sizeof(kws[0]); ++kw_i) {
|
|
if (strncmp(kws[kw_i], &t->text[loc.idx], *i - loc.idx)) {
|
|
return t_tok(t, tys[kw_i], loc);
|
|
}
|
|
}
|
|
report(loc, "invalid identifier", t->text, t->len);
|
|
t->failed = true;
|
|
return tokenizer_next(t);
|
|
}
|
|
|
|
static struct tok t_make_number_tok(
|
|
struct tokenizer *t, struct loc loc, size_t *i)
|
|
{
|
|
while (*i < t->len && t->text[*i] >= '0' && t->text[*i] <= '9') {
|
|
t_step(t);
|
|
}
|
|
enum tokty ty = tt_int;
|
|
if (*i < t->len && t->text[*i] == '.') {
|
|
ty = tt_float;
|
|
t_step(t);
|
|
while (*i < t->len && t->text[*i] >= '0' && t->text[*i] <= '9') {
|
|
t_step(t);
|
|
}
|
|
}
|
|
return t_tok(t, ty, loc);
|
|
}
|
|
|
|
static struct tok t_make_string_tok(
|
|
struct tokenizer *t, struct loc loc, size_t *i)
|
|
{
|
|
t_step(t);
|
|
while (*i < t->len && t->text[*i] != '\"') {
|
|
if (t->text[*i] == '\\') {
|
|
t_step(t);
|
|
if (*i >= t->len)
|
|
break;
|
|
}
|
|
t_step(t);
|
|
}
|
|
if (*i >= t->len && t->text[*i] != '\"') {
|
|
report(loc, "malformed string", t->text, t->len);
|
|
t->failed = true;
|
|
return tokenizer_next(t);
|
|
}
|
|
t_step(t);
|
|
return t_tok(t, tt_string, loc);
|
|
}
|
|
|
|
static struct tok tokenizer_next(struct tokenizer *t)
|
|
{
|
|
struct loc loc = { t->idx, t->line, t->col };
|
|
size_t *i = &t->idx;
|
|
if (*i >= t->len) {
|
|
return t_tok(t, tt_eof, loc);
|
|
}
|
|
bool matched = false;
|
|
while (*i < t->len && strchr(" \t\r\n", t->text[*i]) != NULL) {
|
|
matched = true;
|
|
t_step(t);
|
|
}
|
|
if (matched) {
|
|
return tokenizer_next(t);
|
|
}
|
|
if (strchr(",:[]{}0", t->text[*i]) != NULL) {
|
|
enum tokty ty = (enum tokty)t->text[*i];
|
|
t_step(t);
|
|
return t_tok(t, ty, loc);
|
|
}
|
|
while (*i < t->len && t->text[*i] >= 'a' && t->text[*i] <= 'z') {
|
|
matched = true;
|
|
t_step(t);
|
|
}
|
|
if (matched) {
|
|
return t_make_ident_tok(t, loc, i);
|
|
}
|
|
if (t->text[*i] >= '1' && t->text[*i] <= '9') {
|
|
return t_make_number_tok(t, loc, i);
|
|
}
|
|
if (t->text[*i] == '\"') {
|
|
return t_make_string_tok(t, loc, i);
|
|
}
|
|
report(loc, "illegal character", t->text, t->len);
|
|
t->failed = true;
|
|
t_step(t);
|
|
return tokenizer_next(t);
|
|
}
|
|
|
|
struct parser {
|
|
struct tokenizer tokenizer;
|
|
struct tok tok;
|
|
struct blockalloc allocator;
|
|
};
|
|
|
|
static void p_step(struct parser *p)
|
|
{
|
|
p->tok = tokenizer_next(&p->tokenizer);
|
|
}
|
|
|
|
static void parser_construct(
|
|
struct parser *p, char const *text, size_t text_len)
|
|
{
|
|
*p = (struct parser) {
|
|
.tokenizer = (struct tokenizer) { text, text_len, 0, 1, 1, false },
|
|
.tok = (struct tok) { 0 },
|
|
.allocator = (struct blockalloc) { 0 },
|
|
};
|
|
blockalloc_construct(&p->allocator);
|
|
p_step(p);
|
|
}
|
|
|
|
static void p_report(struct parser *p, struct loc loc, char const *message)
|
|
{
|
|
report(loc, message, p->tokenizer.text, p->tokenizer.len);
|
|
}
|
|
|
|
static struct json_value *parser_parse(struct parser *p);
|
|
|
|
static struct json_value *parser_parse_array(struct parser *p, enum tokty *ty)
|
|
{
|
|
p_step(p);
|
|
struct json_value *val = json_new(json_array);
|
|
bool tail = false;
|
|
while (*ty != tt_eof && ((!tail && *ty != ']') || (tail && *ty == ','))) {
|
|
if (tail)
|
|
p_step(p);
|
|
struct json_value *child = parser_parse(p);
|
|
if (!child)
|
|
goto array_leave_error_free_val;
|
|
json_push(val, child);
|
|
tail = true;
|
|
}
|
|
if (*ty == tt_eof || *ty != ']') {
|
|
p_report(p, p->tok.loc, "expected ']'");
|
|
goto array_leave_error_free_val;
|
|
}
|
|
p_step(p);
|
|
return val;
|
|
array_leave_error_free_val:
|
|
json_free(val);
|
|
return NULL;
|
|
}
|
|
|
|
static struct json_value *parser_parse_object(struct parser *p, enum tokty *ty)
|
|
{
|
|
p_step(p);
|
|
struct json_value *val = json_new(json_object);
|
|
bool tail = false;
|
|
while (*ty != tt_eof && ((!tail && *ty != '}') || (tail && *ty == ','))) {
|
|
if (tail)
|
|
p_step(p);
|
|
if (*ty != tt_string) {
|
|
p_report(p, p->tok.loc, "expected string");
|
|
goto object_leave_error_free_val;
|
|
}
|
|
struct tok key_tok = p->tok;
|
|
p_step(p);
|
|
if (*ty != ':') {
|
|
p_report(p, p->tok.loc, "expected ':'");
|
|
goto object_leave_error_free_val;
|
|
}
|
|
p_step(p);
|
|
struct json_value *child = parser_parse(p);
|
|
if (!child)
|
|
goto object_leave_error_free_val;
|
|
json_set_sized(val, key_tok.ptr + 1, key_tok.len - 2, child);
|
|
tail = true;
|
|
}
|
|
if (*ty == tt_eof || *ty != '}') {
|
|
p_report(p, p->tok.loc, "expected '}'");
|
|
goto object_leave_error_free_val;
|
|
}
|
|
p_step(p);
|
|
return val;
|
|
object_leave_error_free_val:
|
|
json_free(val);
|
|
return NULL;
|
|
}
|
|
|
|
static struct json_value *parser_parse(struct parser *p)
|
|
{
|
|
struct loc loc = p->tok.loc;
|
|
enum tokty *ty = &p->tok.ty;
|
|
|
|
if (*ty == tt_null || *ty == tt_false || *ty == tt_true) {
|
|
struct json_value *val = json_new((enum json_type) * ty);
|
|
p_step(p);
|
|
return val;
|
|
} else if (*ty == tt_int) {
|
|
int64_t value = strtol(p->tok.ptr, NULL, 10);
|
|
struct json_value *val = json_new(json_int);
|
|
json_set_int(val, value);
|
|
p_step(p);
|
|
return val;
|
|
} else if (*ty == tt_float) {
|
|
double value = strtod(p->tok.ptr, NULL);
|
|
struct json_value *val = json_new(json_float);
|
|
json_set_float(val, value);
|
|
p_step(p);
|
|
return val;
|
|
} else if (*ty == tt_string) {
|
|
char *value = blockalloc_alloc(&p->allocator, p->tok.len - 2 + 1, 2);
|
|
strncpy(value, p->tok.ptr + 1, p->tok.len - 2);
|
|
value[p->tok.len - 2] = '\0';
|
|
struct json_value *val = json_new(json_string);
|
|
json_set_string(val, value);
|
|
p_step(p);
|
|
return val;
|
|
} else if (*ty == '[') {
|
|
return parser_parse_array(p, ty);
|
|
} else if (*ty == '{') {
|
|
return parser_parse_object(p, ty);
|
|
} else {
|
|
p_report(p, loc, "expected expression");
|
|
return NULL;
|
|
}
|
|
}
|
|
|
|
struct json_value *json_parse(char const *text, size_t text_size)
|
|
{
|
|
text_size = text_size > 0 ? text_size : strlen(text);
|
|
|
|
struct parser p;
|
|
parser_construct(&p, text, text_size);
|
|
|
|
return parser_parse(&p);
|
|
}
|