#include "collections.h" #include "json.h" #include #include #include #include #include #include #include struct loc { size_t idx; int line; int col; }; static void report( struct loc loc, char const *message, char const *text, size_t text_len) { fprintf(stderr, "error: %s\n", message); if (!text) return; assert(text[loc.idx] != '\n'); size_t line_begin_idx = loc.idx; while (line_begin_idx > 0 && text[line_begin_idx] != '\n') { line_begin_idx -= 1; } if (text[line_begin_idx] == '\n') { line_begin_idx += 1; } size_t line_end_idx = loc.idx + 1; while (line_end_idx < text_len && text[line_end_idx] != '\n') { line_end_idx += 1; } if (line_end_idx >= text_len || text[line_end_idx] == '\n') { line_end_idx -= 1; } int linenr_width = snprintf(NULL, 0, "%d", loc.line); static char const *spaces = " "; printf("%.*s|\n" "%d|%.*s\n" "%.*s|%.*s^\n" "%.*s|\n", linenr_width, spaces, loc.line, (int)(line_end_idx - line_begin_idx + 1), &text[line_begin_idx], linenr_width, spaces, loc.col - 1, spaces, linenr_width, spaces); } enum tokty { tt_eof, tt_null = json_null, tt_false = json_false, tt_true = json_true, tt_string, tt_float, tt_int = '0', tt_comma = ',', tt_colon = ':', tt_lbracket = '[', tt_rbracket = ']', tt_lbrace = '{', tt_rbrace = '}', }; struct tok { enum tokty ty; char const *ptr; size_t len; struct loc loc; }; struct tokenizer { char const *text; size_t len; size_t idx; int line; int col; bool failed; }; static void t_step(struct tokenizer *t) { if (t->idx >= t->len) return; if (t->text[t->idx] == '\n') { t->line += 1; t->col = 1; } else { t->col += 1; } t->idx += 1; } static struct tok t_tok(struct tokenizer *t, enum tokty ty, struct loc loc) { return (struct tok) { ty, &t->text[loc.idx], t->idx - loc.idx, loc }; } static struct tok tokenizer_next(struct tokenizer *t); static struct tok t_make_ident_tok( struct tokenizer *t, struct loc loc, size_t *i) { char const *kws[] = { "null", "false", "true" }; enum tokty tys[] = { tt_null, tt_false, tt_true }; for (size_t kw_i = 0; kw_i < sizeof(kws) / sizeof(kws[0]); ++kw_i) { if (strncmp(kws[kw_i], &t->text[loc.idx], *i - loc.idx)) { return t_tok(t, tys[kw_i], loc); } } report(loc, "invalid identifier", t->text, t->len); t->failed = true; return tokenizer_next(t); } static struct tok t_make_number_tok( struct tokenizer *t, struct loc loc, size_t *i) { while (*i < t->len && t->text[*i] >= '0' && t->text[*i] <= '9') { t_step(t); } enum tokty ty = tt_int; if (*i < t->len && t->text[*i] == '.') { ty = tt_float; t_step(t); while (*i < t->len && t->text[*i] >= '0' && t->text[*i] <= '9') { t_step(t); } } return t_tok(t, ty, loc); } static struct tok t_make_string_tok( struct tokenizer *t, struct loc loc, size_t *i) { t_step(t); while (*i < t->len && t->text[*i] != '\"') { if (t->text[*i] == '\\') { t_step(t); if (*i >= t->len) break; } t_step(t); } if (*i >= t->len && t->text[*i] != '\"') { report(loc, "malformed string", t->text, t->len); t->failed = true; return tokenizer_next(t); } t_step(t); return t_tok(t, tt_string, loc); } static struct tok tokenizer_next(struct tokenizer *t) { struct loc loc = { t->idx, t->line, t->col }; size_t *i = &t->idx; if (*i >= t->len) { return t_tok(t, tt_eof, loc); } bool matched = false; while (*i < t->len && strchr(" \t\r\n", t->text[*i]) != NULL) { matched = true; t_step(t); } if (matched) { return tokenizer_next(t); } if (strchr(",:[]{}0", t->text[*i]) != NULL) { enum tokty ty = (enum tokty)t->text[*i]; t_step(t); return t_tok(t, ty, loc); } while (*i < t->len && t->text[*i] >= 'a' && t->text[*i] <= 'z') { matched = true; t_step(t); } if (matched) { return t_make_ident_tok(t, loc, i); } if (t->text[*i] >= '1' && t->text[*i] <= '9') { return t_make_number_tok(t, loc, i); } if (t->text[*i] == '\"') { return t_make_string_tok(t, loc, i); } report(loc, "illegal character", t->text, t->len); t->failed = true; t_step(t); return tokenizer_next(t); } struct parser { struct tokenizer tokenizer; struct tok tok; struct blockalloc allocator; }; static void p_step(struct parser *p) { p->tok = tokenizer_next(&p->tokenizer); } static void parser_construct( struct parser *p, char const *text, size_t text_len) { *p = (struct parser) { .tokenizer = (struct tokenizer) { text, text_len, 0, 1, 1, false }, .tok = (struct tok) { 0 }, .allocator = (struct blockalloc) { 0 }, }; blockalloc_construct(&p->allocator); p_step(p); } static void p_report(struct parser *p, struct loc loc, char const *message) { report(loc, message, p->tokenizer.text, p->tokenizer.len); } static struct json_value *parser_parse(struct parser *p); static struct json_value *parser_parse_array(struct parser *p, enum tokty *ty) { p_step(p); struct json_value *val = json_new(json_array); bool tail = false; while (*ty != tt_eof && ((!tail && *ty != ']') || (tail && *ty == ','))) { if (tail) p_step(p); struct json_value *child = parser_parse(p); if (!child) goto array_leave_error_free_val; json_push(val, child); tail = true; } if (*ty == tt_eof || *ty != ']') { p_report(p, p->tok.loc, "expected ']'"); goto array_leave_error_free_val; } p_step(p); return val; array_leave_error_free_val: json_free(val); return NULL; } static struct json_value *parser_parse_object(struct parser *p, enum tokty *ty) { p_step(p); struct json_value *val = json_new(json_object); bool tail = false; while (*ty != tt_eof && ((!tail && *ty != '}') || (tail && *ty == ','))) { if (tail) p_step(p); if (*ty != tt_string) { p_report(p, p->tok.loc, "expected string"); goto object_leave_error_free_val; } struct tok key_tok = p->tok; p_step(p); if (*ty != ':') { p_report(p, p->tok.loc, "expected ':'"); goto object_leave_error_free_val; } p_step(p); struct json_value *child = parser_parse(p); if (!child) goto object_leave_error_free_val; json_set_sized(val, key_tok.ptr + 1, key_tok.len - 2, child); tail = true; } if (*ty == tt_eof || *ty != '}') { p_report(p, p->tok.loc, "expected '}'"); goto object_leave_error_free_val; } p_step(p); return val; object_leave_error_free_val: json_free(val); return NULL; } static struct json_value *parser_parse(struct parser *p) { struct loc loc = p->tok.loc; enum tokty *ty = &p->tok.ty; if (*ty == tt_null || *ty == tt_false || *ty == tt_true) { struct json_value *val = json_new((enum json_type) * ty); p_step(p); return val; } else if (*ty == tt_int) { int64_t value = strtol(p->tok.ptr, NULL, 10); struct json_value *val = json_new(json_int); json_set_int(val, value); p_step(p); return val; } else if (*ty == tt_float) { double value = strtod(p->tok.ptr, NULL); struct json_value *val = json_new(json_float); json_set_float(val, value); p_step(p); return val; } else if (*ty == tt_string) { char *value = blockalloc_alloc(&p->allocator, p->tok.len - 2 + 1, 2); strncpy(value, p->tok.ptr + 1, p->tok.len - 2); value[p->tok.len - 2] = '\0'; struct json_value *val = json_new(json_string); json_set_string(val, value); p_step(p); return val; } else if (*ty == '[') { return parser_parse_array(p, ty); } else if (*ty == '{') { return parser_parse_object(p, ty); } else { p_report(p, loc, "expected expression"); return NULL; } } struct json_value *json_parse(char const *text, size_t text_size) { text_size = text_size > 0 ? text_size : strlen(text); struct parser p; parser_construct(&p, text, text_size); return parser_parse(&p); }