refactor json
This commit is contained in:
parent
e36f1e1b81
commit
902125750b
2
Makefile
2
Makefile
@ -11,6 +11,8 @@ obj_dir = $(build_dir)/obj
|
||||
|
||||
sources = \
|
||||
src/main.c \
|
||||
src/json_parse.c \
|
||||
src/json_value.c \
|
||||
src/collections.c
|
||||
|
||||
target=$(build_dir)/jq
|
||||
|
||||
@ -6,6 +6,13 @@
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
#define MIN(A, B) ((A) <= (B) ? (A) : (B))
|
||||
|
||||
#define MAX(A, B) ((A) >= (B) ? (A) : (B))
|
||||
|
||||
#define ALIGN(VAL, ALIGN) \
|
||||
((VAL) % (ALIGN) != 0 ? (VAL) + ((ALIGN) - (VAL) % (ALIGN)) : (VAL))
|
||||
|
||||
void *array_push(void **data,
|
||||
size_t *capacity,
|
||||
size_t *count,
|
||||
@ -51,6 +58,90 @@ void *array_insert_at(void **data,
|
||||
return src_ptr;
|
||||
}
|
||||
|
||||
void smallarray_construct(struct smallarray *a)
|
||||
{
|
||||
*a = (struct smallarray) { 0 };
|
||||
}
|
||||
|
||||
void smallarray_destroy(struct smallarray *a)
|
||||
{
|
||||
if ((a->smalldata[0] & 1) == 0 && a->data) {
|
||||
free(a->data);
|
||||
}
|
||||
}
|
||||
|
||||
static bool sa_is_big(struct smallarray const *a)
|
||||
{
|
||||
return a->data && (a->smalldata[0] & 1) == 0;
|
||||
}
|
||||
|
||||
static void sa_push_big(struct smallarray *a, void *value)
|
||||
{
|
||||
size_t capacity = 8;
|
||||
void **ptr = malloc(capacity * sizeof(void *));
|
||||
size_t count = 0;
|
||||
|
||||
for (size_t i = 0; i < 3; ++i) {
|
||||
if ((a->smalldata[i] & 1) == 0)
|
||||
break;
|
||||
ptr[i] = (void *)(a->smalldata[i] & ~1ull);
|
||||
count += 1;
|
||||
}
|
||||
|
||||
a->capacity = capacity;
|
||||
a->data = ptr;
|
||||
a->count = count;
|
||||
array_push(
|
||||
(void **)&a->data, &a->capacity, &a->count, &value, sizeof(void *));
|
||||
}
|
||||
|
||||
static void sa_push_small(struct smallarray *a, void *value)
|
||||
{
|
||||
for (size_t i = 0; i < 3; ++i) {
|
||||
if (a->smalldata[i] & 1)
|
||||
continue;
|
||||
a->smalldata[i] = (size_t)value | 1;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
void smallarray_push(struct smallarray *a, void *value)
|
||||
{
|
||||
assert((size_t)value % 2 == 0 && "pointer must be 2 bytes aligned");
|
||||
|
||||
if (sa_is_big(a)) {
|
||||
sa_push_big(a, value);
|
||||
} else {
|
||||
sa_push_small(a, value);
|
||||
}
|
||||
}
|
||||
|
||||
size_t smallarray_count(struct smallarray const *a)
|
||||
{
|
||||
if (sa_is_big(a)) {
|
||||
return a->count;
|
||||
} else {
|
||||
size_t count = 0;
|
||||
for (size_t i = 0; i < 3; ++i) {
|
||||
if ((a->smalldata[i] & 1) == 0)
|
||||
break;
|
||||
count += 1;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
}
|
||||
|
||||
void *smallarray_get(struct smallarray *a, size_t idx)
|
||||
{
|
||||
if (sa_is_big(a)) {
|
||||
return a->data[idx];
|
||||
} else {
|
||||
if (idx >= 3 || (a->smalldata[idx] & 1) == 0)
|
||||
return NULL;
|
||||
return (void *)(a->smalldata[idx] & ~1ull);
|
||||
}
|
||||
}
|
||||
|
||||
static uint64_t hash_key(char const *data)
|
||||
{
|
||||
// djb2
|
||||
@ -65,6 +156,20 @@ static uint64_t hash_key(char const *data)
|
||||
return hash;
|
||||
}
|
||||
|
||||
static uint64_t hash_key_sized(char const *data, size_t size)
|
||||
{
|
||||
// djb2
|
||||
|
||||
uint64_t hash = 5381;
|
||||
unsigned char c;
|
||||
|
||||
while (size-- > 0 && (c = (unsigned char)*data++)) {
|
||||
hash = ((hash << 5) + hash) + c; // hash * 33 + c
|
||||
}
|
||||
|
||||
return hash;
|
||||
}
|
||||
|
||||
void hashmap_construct(struct hashmap *t)
|
||||
{
|
||||
*t = (struct hashmap) { NULL, 0, 0 };
|
||||
@ -79,18 +184,18 @@ void hashmap_destroy(struct hashmap *t)
|
||||
|
||||
static struct hash_entry *find_entry(struct hashmap *m, uint64_t hash)
|
||||
{
|
||||
for (size_t b_idx = 0; b_idx < m->buckets_count; ++b_idx) {
|
||||
struct hash_bucket *bucket = &m->buckets[b_idx];
|
||||
if (hash < bucket->first_hash || hash > bucket->last_hash) {
|
||||
for (size_t i = 0; i < m->buckets_count; ++i) {
|
||||
struct hash_bucket *bucket = &m->buckets[i];
|
||||
if (hash < bucket->first_hash)
|
||||
break;
|
||||
if (hash > bucket->last_hash)
|
||||
continue;
|
||||
}
|
||||
for (size_t e_idx = 0; e_idx < bucket->count; ++e_idx) {
|
||||
struct hash_entry *entry = &bucket->entries[e_idx];
|
||||
if (entry->hash == hash) {
|
||||
for (size_t j = 0; j < bucket->count; ++j) {
|
||||
struct hash_entry *entry = &bucket->entries[j];
|
||||
if (entry->hash == hash)
|
||||
return entry;
|
||||
}
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@ -111,9 +216,6 @@ static struct hash_bucket *insert_bucket_at(struct hashmap *m, size_t idx)
|
||||
sizeof(struct hash_bucket));
|
||||
}
|
||||
|
||||
#define MIN(A, B) ((A) <= (B) ? (A) : (B))
|
||||
#define MAX(A, B) ((A) >= (B) ? (A) : (B))
|
||||
|
||||
static struct hash_entry *add_entry_to_bucket(
|
||||
struct hash_bucket *b, uint64_t hash)
|
||||
{
|
||||
@ -154,10 +256,8 @@ static struct hash_entry *make_entry(struct hashmap *m, uint64_t hash)
|
||||
assert(false);
|
||||
}
|
||||
|
||||
void hashmap_set(struct hashmap *m, char const *key, void *value)
|
||||
static void hashmap_set_internal(struct hashmap *m, uint64_t hash, void *value)
|
||||
{
|
||||
uint64_t hash = hash_key(key);
|
||||
|
||||
if (m->buckets_count == 0) {
|
||||
struct hash_bucket *bucket = insert_bucket_at(m, 0);
|
||||
bucket->entries[0] = (struct hash_entry) { hash, value };
|
||||
@ -176,15 +276,77 @@ void hashmap_set(struct hashmap *m, char const *key, void *value)
|
||||
return;
|
||||
}
|
||||
|
||||
void hashmap_set(struct hashmap *m, char const *key, void *value)
|
||||
{
|
||||
uint64_t hash = hash_key(key);
|
||||
hashmap_set_internal(m, hash, value);
|
||||
}
|
||||
|
||||
void hashmap_set_sized(
|
||||
struct hashmap *m, char const *key, size_t key_size, void *value)
|
||||
{
|
||||
uint64_t hash = hash_key_sized(key, key_size);
|
||||
hashmap_set_internal(m, hash, value);
|
||||
}
|
||||
|
||||
bool hashmap_has(struct hashmap *m, char const *key)
|
||||
{
|
||||
uint64_t hash = hash_key(key);
|
||||
return find_entry(m, hash) != NULL;
|
||||
}
|
||||
|
||||
void *hashmap_get(struct hashmap *m, char const *key)
|
||||
static void *hashmap_get_internal(struct hashmap *m, uint64_t hash)
|
||||
{
|
||||
uint64_t hash = hash_key(key);
|
||||
struct hash_entry *entry = find_entry(m, hash);
|
||||
return entry ? entry->value : NULL;
|
||||
}
|
||||
void *hashmap_get(struct hashmap *m, char const *key)
|
||||
{
|
||||
uint64_t hash = hash_key(key);
|
||||
return hashmap_get_internal(m, hash);
|
||||
}
|
||||
void *hashmap_get_sized(struct hashmap *m, char const *key, size_t key_size)
|
||||
{
|
||||
uint64_t hash = hash_key_sized(key, key_size);
|
||||
return hashmap_get_internal(m, hash);
|
||||
}
|
||||
|
||||
struct blockalloc_block {
|
||||
unsigned char *data;
|
||||
size_t size;
|
||||
};
|
||||
|
||||
void blockalloc_construct(struct blockalloc *a)
|
||||
{
|
||||
*a = (struct blockalloc) { NULL, 0, 0, 0 };
|
||||
}
|
||||
|
||||
void blockalloc_destroy(struct blockalloc *a)
|
||||
{
|
||||
if (a->blocks)
|
||||
free(a->blocks);
|
||||
}
|
||||
|
||||
void *blockalloc_alloc(struct blockalloc *a, size_t size, size_t align)
|
||||
{
|
||||
size_t p = ALIGN(a->p, align);
|
||||
if (!a->blocks || p + size > a->blocks[a->count - 1].size) {
|
||||
size_t block_size = MAX(blockalloc_default_block, size);
|
||||
|
||||
struct blockalloc_block block = {
|
||||
.data = malloc(block_size),
|
||||
.size = block_size,
|
||||
};
|
||||
|
||||
array_push((void **)&a->blocks,
|
||||
&a->capacity,
|
||||
&a->count,
|
||||
&block,
|
||||
sizeof(struct blockalloc_block));
|
||||
a->p = 0;
|
||||
p = 0;
|
||||
}
|
||||
void *ptr = &a->blocks[a->count - 1].data[p];
|
||||
a->p = p + size;
|
||||
return ptr;
|
||||
}
|
||||
|
||||
@ -18,6 +18,23 @@ void *array_insert_at(void **data,
|
||||
void const *elem,
|
||||
size_t elem_size);
|
||||
|
||||
struct smallarray {
|
||||
union {
|
||||
size_t smalldata[3];
|
||||
struct {
|
||||
void **data;
|
||||
size_t capacity;
|
||||
size_t count;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
void smallarray_construct(struct smallarray *a);
|
||||
void smallarray_destroy(struct smallarray *a);
|
||||
void smallarray_push(struct smallarray *a, void *value);
|
||||
size_t smallarray_count(struct smallarray const *a);
|
||||
void *smallarray_get(struct smallarray *a, size_t idx);
|
||||
|
||||
struct hash_entry {
|
||||
uint64_t hash;
|
||||
void *value;
|
||||
@ -41,7 +58,25 @@ struct hashmap {
|
||||
void hashmap_construct(struct hashmap *m);
|
||||
void hashmap_destroy(struct hashmap *m);
|
||||
void hashmap_set(struct hashmap *m, char const *key, void *value);
|
||||
void hashmap_set_sized(
|
||||
struct hashmap *m, char const *key, size_t key_size, void *value);
|
||||
bool hashmap_has(struct hashmap *m, char const *key);
|
||||
void *hashmap_get(struct hashmap *m, char const *key);
|
||||
void *hashmap_get_sized(struct hashmap *m, char const *key, size_t key_size);
|
||||
|
||||
#define blockalloc_default_block 4096
|
||||
|
||||
struct blockalloc_block;
|
||||
|
||||
struct blockalloc {
|
||||
struct blockalloc_block *blocks;
|
||||
size_t capacity;
|
||||
size_t count;
|
||||
size_t p;
|
||||
};
|
||||
|
||||
void blockalloc_construct(struct blockalloc *a);
|
||||
void blockalloc_destroy(struct blockalloc *a);
|
||||
void *blockalloc_alloc(struct blockalloc *a, size_t size, size_t align);
|
||||
|
||||
#endif
|
||||
|
||||
51
src/json.h
Normal file
51
src/json.h
Normal file
@ -0,0 +1,51 @@
|
||||
#ifndef JSON_H
|
||||
#define JSON_H
|
||||
|
||||
#include <stdbool.h>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
enum json_type {
|
||||
json_null = 1,
|
||||
json_false = 2,
|
||||
json_true = 3,
|
||||
json_int,
|
||||
json_float,
|
||||
json_string,
|
||||
json_array,
|
||||
json_object,
|
||||
};
|
||||
|
||||
struct json_value;
|
||||
|
||||
struct json_value *json_new(enum json_type type);
|
||||
void json_free(struct json_value *value);
|
||||
|
||||
bool json_is(struct json_value const *value, enum json_type type);
|
||||
|
||||
bool json_get_bool(struct json_value const *value);
|
||||
int64_t json_get_int(struct json_value const *value);
|
||||
double json_get_float(struct json_value const *value);
|
||||
char *json_get_string(struct json_value *value);
|
||||
|
||||
void json_set_bool(struct json_value *value, bool val);
|
||||
void json_set_int(struct json_value *value, int64_t val);
|
||||
void json_set_float(struct json_value *value, double val);
|
||||
void json_set_string(struct json_value *value, char *val);
|
||||
|
||||
struct json_value *json_idx(struct json_value *array, size_t idx);
|
||||
void json_push(struct json_value *array, struct json_value *value);
|
||||
|
||||
struct json_value *json_key(struct json_value *object, char const *key);
|
||||
struct json_value *json_key_sized(
|
||||
struct json_value *object, char const *key, size_t key_size);
|
||||
void json_set(
|
||||
struct json_value *object, char const *key, struct json_value *value);
|
||||
void json_set_sized(struct json_value *object,
|
||||
char const *key,
|
||||
size_t key_size,
|
||||
struct json_value *value);
|
||||
|
||||
struct json_value *json_parse(char const *text, size_t text_size);
|
||||
|
||||
#endif
|
||||
339
src/json_parse.c
Normal file
339
src/json_parse.c
Normal file
@ -0,0 +1,339 @@
|
||||
#include "collections.h"
|
||||
#include "json.h"
|
||||
#include <assert.h>
|
||||
#include <stdbool.h>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
struct loc {
|
||||
size_t idx;
|
||||
int line;
|
||||
int col;
|
||||
};
|
||||
|
||||
static void report(
|
||||
struct loc loc, char const *message, char const *text, size_t text_len)
|
||||
{
|
||||
fprintf(stderr, "error: %s\n", message);
|
||||
if (!text)
|
||||
return;
|
||||
assert(text[loc.idx] != '\n');
|
||||
size_t line_begin_idx = loc.idx;
|
||||
while (line_begin_idx > 0 && text[line_begin_idx] != '\n') {
|
||||
line_begin_idx -= 1;
|
||||
}
|
||||
if (text[line_begin_idx] == '\n') {
|
||||
line_begin_idx += 1;
|
||||
}
|
||||
size_t line_end_idx = loc.idx + 1;
|
||||
while (line_end_idx < text_len && text[line_end_idx] != '\n') {
|
||||
line_end_idx += 1;
|
||||
}
|
||||
if (line_end_idx >= text_len || text[line_end_idx] == '\n') {
|
||||
line_end_idx -= 1;
|
||||
}
|
||||
int linenr_width = snprintf(NULL, 0, "%d", loc.line);
|
||||
static char const *spaces = " ";
|
||||
printf("%.*s|\n"
|
||||
"%d|%.*s\n"
|
||||
"%.*s|%.*s^\n"
|
||||
"%.*s|\n",
|
||||
linenr_width,
|
||||
spaces,
|
||||
loc.line,
|
||||
(int)(line_end_idx - line_begin_idx + 1),
|
||||
&text[line_begin_idx],
|
||||
linenr_width,
|
||||
spaces,
|
||||
loc.col - 1,
|
||||
spaces,
|
||||
linenr_width,
|
||||
spaces);
|
||||
}
|
||||
|
||||
enum tokty {
|
||||
tt_eof,
|
||||
tt_null = json_null,
|
||||
tt_false = json_false,
|
||||
tt_true = json_true,
|
||||
tt_string,
|
||||
tt_float,
|
||||
tt_int = '0',
|
||||
tt_comma = ',',
|
||||
tt_colon = ':',
|
||||
tt_lbracket = '[',
|
||||
tt_rbracket = ']',
|
||||
tt_lbrace = '{',
|
||||
tt_rbrace = '}',
|
||||
};
|
||||
|
||||
struct tok {
|
||||
enum tokty ty;
|
||||
char const *ptr;
|
||||
size_t len;
|
||||
struct loc loc;
|
||||
};
|
||||
|
||||
struct tokenizer {
|
||||
char const *text;
|
||||
size_t len;
|
||||
size_t idx;
|
||||
int line;
|
||||
int col;
|
||||
bool failed;
|
||||
};
|
||||
|
||||
static void t_step(struct tokenizer *t)
|
||||
{
|
||||
if (t->idx >= t->len)
|
||||
return;
|
||||
if (t->text[t->idx] == '\n') {
|
||||
t->line += 1;
|
||||
t->col = 1;
|
||||
} else {
|
||||
t->col += 1;
|
||||
}
|
||||
t->idx += 1;
|
||||
}
|
||||
|
||||
static struct tok t_tok(struct tokenizer *t, enum tokty ty, struct loc loc)
|
||||
{
|
||||
return (struct tok) { ty, &t->text[loc.idx], t->idx - loc.idx, loc };
|
||||
}
|
||||
|
||||
static struct tok tokenizer_next(struct tokenizer *t);
|
||||
|
||||
static struct tok t_make_ident_tok(
|
||||
struct tokenizer *t, struct loc loc, size_t *i)
|
||||
{
|
||||
char const *kws[] = { "null", "false", "true" };
|
||||
enum tokty tys[] = { tt_null, tt_false, tt_true };
|
||||
for (size_t kw_i = 0; kw_i < sizeof(kws) / sizeof(kws[0]); ++kw_i) {
|
||||
if (strncmp(kws[kw_i], &t->text[loc.idx], *i - loc.idx)) {
|
||||
return t_tok(t, tys[kw_i], loc);
|
||||
}
|
||||
}
|
||||
report(loc, "invalid identifier", t->text, t->len);
|
||||
t->failed = true;
|
||||
return tokenizer_next(t);
|
||||
}
|
||||
|
||||
static struct tok t_make_number_tok(
|
||||
struct tokenizer *t, struct loc loc, size_t *i)
|
||||
{
|
||||
while (*i < t->len && t->text[*i] >= '0' && t->text[*i] <= '9') {
|
||||
t_step(t);
|
||||
}
|
||||
enum tokty ty = tt_int;
|
||||
if (*i < t->len && t->text[*i] == '.') {
|
||||
ty = tt_float;
|
||||
t_step(t);
|
||||
while (*i < t->len && t->text[*i] >= '0' && t->text[*i] <= '9') {
|
||||
t_step(t);
|
||||
}
|
||||
}
|
||||
return t_tok(t, ty, loc);
|
||||
}
|
||||
|
||||
static struct tok t_make_string_tok(
|
||||
struct tokenizer *t, struct loc loc, size_t *i)
|
||||
{
|
||||
t_step(t);
|
||||
while (*i < t->len && t->text[*i] != '\"') {
|
||||
if (t->text[*i] == '\\') {
|
||||
t_step(t);
|
||||
if (*i >= t->len)
|
||||
break;
|
||||
}
|
||||
t_step(t);
|
||||
}
|
||||
if (*i >= t->len && t->text[*i] != '\"') {
|
||||
report(loc, "malformed string", t->text, t->len);
|
||||
t->failed = true;
|
||||
return tokenizer_next(t);
|
||||
}
|
||||
t_step(t);
|
||||
return t_tok(t, tt_string, loc);
|
||||
}
|
||||
|
||||
static struct tok tokenizer_next(struct tokenizer *t)
|
||||
{
|
||||
struct loc loc = { t->idx, t->line, t->col };
|
||||
size_t *i = &t->idx;
|
||||
if (*i >= t->len) {
|
||||
return t_tok(t, tt_eof, loc);
|
||||
}
|
||||
bool matched = false;
|
||||
while (*i < t->len && strchr(" \t\r\n", t->text[*i]) != NULL) {
|
||||
matched = true;
|
||||
t_step(t);
|
||||
}
|
||||
if (matched) {
|
||||
return tokenizer_next(t);
|
||||
}
|
||||
if (strchr(",:[]{}0", t->text[*i]) != NULL) {
|
||||
enum tokty ty = (enum tokty)t->text[*i];
|
||||
t_step(t);
|
||||
return t_tok(t, ty, loc);
|
||||
}
|
||||
while (*i < t->len && t->text[*i] >= 'a' && t->text[*i] <= 'z') {
|
||||
matched = true;
|
||||
t_step(t);
|
||||
}
|
||||
if (matched) {
|
||||
return t_make_ident_tok(t, loc, i);
|
||||
}
|
||||
if (t->text[*i] >= '1' && t->text[*i] <= '9') {
|
||||
return t_make_number_tok(t, loc, i);
|
||||
}
|
||||
if (t->text[*i] == '\"') {
|
||||
return t_make_string_tok(t, loc, i);
|
||||
}
|
||||
report(loc, "illegal character", t->text, t->len);
|
||||
t->failed = true;
|
||||
t_step(t);
|
||||
return tokenizer_next(t);
|
||||
}
|
||||
|
||||
struct parser {
|
||||
struct tokenizer tokenizer;
|
||||
struct tok tok;
|
||||
struct blockalloc allocator;
|
||||
};
|
||||
|
||||
static void p_step(struct parser *p)
|
||||
{
|
||||
p->tok = tokenizer_next(&p->tokenizer);
|
||||
}
|
||||
|
||||
static void parser_construct(
|
||||
struct parser *p, char const *text, size_t text_len)
|
||||
{
|
||||
*p = (struct parser) {
|
||||
.tokenizer = (struct tokenizer) { text, text_len, 0, 1, 1, false },
|
||||
.tok = (struct tok) { 0 },
|
||||
.allocator = (struct blockalloc) { 0 },
|
||||
};
|
||||
blockalloc_construct(&p->allocator);
|
||||
p_step(p);
|
||||
}
|
||||
|
||||
static void p_report(struct parser *p, struct loc loc, char const *message)
|
||||
{
|
||||
report(loc, message, p->tokenizer.text, p->tokenizer.len);
|
||||
}
|
||||
|
||||
static struct json_value *parser_parse(struct parser *p);
|
||||
|
||||
static struct json_value *parser_parse_array(struct parser *p, enum tokty *ty)
|
||||
{
|
||||
p_step(p);
|
||||
struct json_value *val = json_new(json_array);
|
||||
bool tail = false;
|
||||
while (*ty != tt_eof && ((!tail && *ty != ']') || (tail && *ty == ','))) {
|
||||
if (tail)
|
||||
p_step(p);
|
||||
struct json_value *child = parser_parse(p);
|
||||
if (!child)
|
||||
goto array_leave_error_free_val;
|
||||
json_push(val, child);
|
||||
tail = true;
|
||||
}
|
||||
if (*ty == tt_eof || *ty != ']') {
|
||||
p_report(p, p->tok.loc, "expected ']'");
|
||||
goto array_leave_error_free_val;
|
||||
}
|
||||
p_step(p);
|
||||
return val;
|
||||
array_leave_error_free_val:
|
||||
json_free(val);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static struct json_value *parser_parse_object(struct parser *p, enum tokty *ty)
|
||||
{
|
||||
p_step(p);
|
||||
struct json_value *val = json_new(json_object);
|
||||
bool tail = false;
|
||||
while (*ty != tt_eof && ((!tail && *ty != '}') || (tail && *ty == ','))) {
|
||||
if (tail)
|
||||
p_step(p);
|
||||
if (*ty != tt_string) {
|
||||
p_report(p, p->tok.loc, "expected string");
|
||||
goto object_leave_error_free_val;
|
||||
}
|
||||
struct tok key_tok = p->tok;
|
||||
p_step(p);
|
||||
if (*ty != ':') {
|
||||
p_report(p, p->tok.loc, "expected ':'");
|
||||
goto object_leave_error_free_val;
|
||||
}
|
||||
p_step(p);
|
||||
struct json_value *child = parser_parse(p);
|
||||
if (!child)
|
||||
goto object_leave_error_free_val;
|
||||
json_set_sized(val, key_tok.ptr + 1, key_tok.len - 2, child);
|
||||
tail = true;
|
||||
}
|
||||
if (*ty == tt_eof || *ty != '}') {
|
||||
p_report(p, p->tok.loc, "expected '}'");
|
||||
goto object_leave_error_free_val;
|
||||
}
|
||||
p_step(p);
|
||||
return val;
|
||||
object_leave_error_free_val:
|
||||
json_free(val);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static struct json_value *parser_parse(struct parser *p)
|
||||
{
|
||||
struct loc loc = p->tok.loc;
|
||||
enum tokty *ty = &p->tok.ty;
|
||||
|
||||
if (*ty == tt_null || *ty == tt_false || *ty == tt_true) {
|
||||
struct json_value *val = json_new((enum json_type) * ty);
|
||||
p_step(p);
|
||||
return val;
|
||||
} else if (*ty == tt_int) {
|
||||
int64_t value = strtol(p->tok.ptr, NULL, 10);
|
||||
struct json_value *val = json_new(json_int);
|
||||
json_set_int(val, value);
|
||||
p_step(p);
|
||||
return val;
|
||||
} else if (*ty == tt_float) {
|
||||
double value = strtod(p->tok.ptr, NULL);
|
||||
struct json_value *val = json_new(json_float);
|
||||
json_set_float(val, value);
|
||||
p_step(p);
|
||||
return val;
|
||||
} else if (*ty == tt_string) {
|
||||
char *value = blockalloc_alloc(&p->allocator, p->tok.len - 2 + 1, 2);
|
||||
strncpy(value, p->tok.ptr + 1, p->tok.len - 2);
|
||||
value[p->tok.len - 2] = '\0';
|
||||
struct json_value *val = json_new(json_string);
|
||||
json_set_string(val, value);
|
||||
p_step(p);
|
||||
return val;
|
||||
} else if (*ty == '[') {
|
||||
return parser_parse_array(p, ty);
|
||||
} else if (*ty == '{') {
|
||||
return parser_parse_object(p, ty);
|
||||
} else {
|
||||
p_report(p, loc, "expected expression");
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
struct json_value *json_parse(char const *text, size_t text_size)
|
||||
{
|
||||
text_size = text_size > 0 ? text_size : strlen(text);
|
||||
|
||||
struct parser p;
|
||||
parser_construct(&p, text, text_size);
|
||||
|
||||
return parser_parse(&p);
|
||||
}
|
||||
153
src/json_value.c
Normal file
153
src/json_value.c
Normal file
@ -0,0 +1,153 @@
|
||||
#include "collections.h"
|
||||
#include "json.h"
|
||||
#include <assert.h>
|
||||
#include <stdbool.h>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
struct json_value {
|
||||
enum json_type ty;
|
||||
union {
|
||||
int64_t int_val;
|
||||
double float_val;
|
||||
char *string_val;
|
||||
struct smallarray array_vals;
|
||||
struct hashmap object_fields;
|
||||
};
|
||||
};
|
||||
|
||||
struct json_value *json_new(enum json_type ty)
|
||||
{
|
||||
struct json_value *v = malloc(sizeof(struct json_value));
|
||||
*v = (struct json_value) { .ty = ty };
|
||||
switch (ty) {
|
||||
case json_null:
|
||||
case json_false:
|
||||
case json_true:
|
||||
case json_int:
|
||||
case json_float:
|
||||
case json_string:
|
||||
break;
|
||||
case json_array:
|
||||
smallarray_construct(&v->array_vals);
|
||||
break;
|
||||
case json_object:
|
||||
hashmap_construct(&v->object_fields);
|
||||
break;
|
||||
}
|
||||
return v;
|
||||
}
|
||||
|
||||
void json_free(struct json_value *v)
|
||||
{
|
||||
switch (v->ty) {
|
||||
case json_null:
|
||||
case json_false:
|
||||
case json_true:
|
||||
case json_int:
|
||||
case json_float:
|
||||
break;
|
||||
case json_string:
|
||||
if (v->string_val)
|
||||
free(v->string_val);
|
||||
break;
|
||||
case json_array:
|
||||
smallarray_destroy(&v->array_vals);
|
||||
break;
|
||||
case json_object:
|
||||
hashmap_destroy(&v->object_fields);
|
||||
break;
|
||||
}
|
||||
free(v);
|
||||
}
|
||||
|
||||
bool json_is(struct json_value const *value, enum json_type type)
|
||||
{
|
||||
return value->ty == type;
|
||||
}
|
||||
|
||||
bool json_get_bool(struct json_value const *value)
|
||||
{
|
||||
assert(value->ty == json_true || value->ty == json_false);
|
||||
return value->ty == json_true;
|
||||
}
|
||||
int64_t json_get_int(struct json_value const *value)
|
||||
{
|
||||
assert(value->ty == json_int);
|
||||
return value->int_val;
|
||||
}
|
||||
double json_get_float(struct json_value const *value)
|
||||
{
|
||||
assert(value->ty == json_float);
|
||||
return value->float_val;
|
||||
}
|
||||
char *json_get_string(struct json_value *value)
|
||||
{
|
||||
assert(value->ty == json_string);
|
||||
return value->string_val;
|
||||
}
|
||||
|
||||
void json_set_bool(struct json_value *value, bool val)
|
||||
{
|
||||
assert(value->ty == json_true || value->ty == json_false);
|
||||
value->ty = val ? json_true : json_false;
|
||||
}
|
||||
void json_set_int(struct json_value *value, int64_t val)
|
||||
{
|
||||
assert(value->ty == json_int);
|
||||
value->int_val = val;
|
||||
}
|
||||
void json_set_float(struct json_value *value, double val)
|
||||
{
|
||||
assert(value->ty == json_float);
|
||||
value->float_val = val;
|
||||
}
|
||||
void json_set_string(struct json_value *value, char *val)
|
||||
{
|
||||
assert(value->ty == json_string);
|
||||
value->string_val = val;
|
||||
}
|
||||
|
||||
struct json_value *json_idx(struct json_value *array, size_t idx)
|
||||
{
|
||||
assert(array->ty == json_array);
|
||||
return smallarray_get(&array->array_vals, idx);
|
||||
}
|
||||
|
||||
void json_push(struct json_value *array, struct json_value *value)
|
||||
{
|
||||
assert(array->ty == json_array);
|
||||
smallarray_push(&array->array_vals, value);
|
||||
}
|
||||
|
||||
struct json_value *json_key(struct json_value *object, char const *key)
|
||||
{
|
||||
assert(object->ty == json_object);
|
||||
return hashmap_get(&object->object_fields, key);
|
||||
}
|
||||
|
||||
struct json_value *json_key_sized(
|
||||
struct json_value *object, char const *key, size_t key_size)
|
||||
{
|
||||
assert(object->ty == json_object);
|
||||
return hashmap_get_sized(&object->object_fields, key, key_size);
|
||||
}
|
||||
|
||||
void json_set(
|
||||
struct json_value *object, char const *key, struct json_value *value)
|
||||
{
|
||||
assert(object->ty == json_object);
|
||||
hashmap_set(&object->object_fields, key, value);
|
||||
}
|
||||
|
||||
void json_set_sized(struct json_value *object,
|
||||
char const *key,
|
||||
size_t key_size,
|
||||
struct json_value *value)
|
||||
{
|
||||
assert(object->ty == json_object);
|
||||
hashmap_set_sized(&object->object_fields, key, key_size, value);
|
||||
}
|
||||
260
src/main.c
260
src/main.c
@ -1,4 +1,5 @@
|
||||
#include "collections.h"
|
||||
#include "json.h"
|
||||
#include <assert.h>
|
||||
#include <errno.h>
|
||||
#include <stdbool.h>
|
||||
@ -8,197 +9,6 @@
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
struct loc {
|
||||
size_t idx;
|
||||
int line;
|
||||
int col;
|
||||
};
|
||||
|
||||
void report(
|
||||
struct loc loc, char const *message, char const *text, size_t text_len)
|
||||
{
|
||||
fprintf(stderr, "error: %s\n", message);
|
||||
if (!text)
|
||||
return;
|
||||
assert(text[loc.idx] != '\n');
|
||||
size_t line_begin_idx = loc.idx;
|
||||
while (line_begin_idx > 0 && text[line_begin_idx] != '\n') {
|
||||
line_begin_idx -= 1;
|
||||
}
|
||||
if (text[line_begin_idx] == '\n') {
|
||||
line_begin_idx += 1;
|
||||
}
|
||||
size_t line_end_idx = loc.idx + 1;
|
||||
while (line_end_idx < text_len && text[line_end_idx] != '\n') {
|
||||
line_end_idx += 1;
|
||||
}
|
||||
if (line_end_idx >= text_len || text[line_end_idx] == '\n') {
|
||||
line_end_idx -= 1;
|
||||
}
|
||||
int linenr_width = snprintf(NULL, 0, "%d", loc.line);
|
||||
static char const *spaces = " ";
|
||||
printf("%.*s|\n"
|
||||
"%d|%.*s\n"
|
||||
"%.*s|%.*s^\n"
|
||||
"%.*s|\n",
|
||||
linenr_width,
|
||||
spaces,
|
||||
loc.line,
|
||||
(int)(line_end_idx - line_begin_idx + 1),
|
||||
&text[line_begin_idx],
|
||||
linenr_width,
|
||||
spaces,
|
||||
loc.col - 1,
|
||||
spaces,
|
||||
linenr_width,
|
||||
spaces);
|
||||
}
|
||||
|
||||
enum tokty {
|
||||
tt_eof,
|
||||
tt_null,
|
||||
tt_false,
|
||||
tt_true,
|
||||
tt_string,
|
||||
tt_float,
|
||||
tt_int = '0',
|
||||
tt_comma = ',',
|
||||
tt_colon = ':',
|
||||
tt_lbracket = '[',
|
||||
tt_rbracket = ']',
|
||||
tt_lbrace = '{',
|
||||
tt_rbrace = '}',
|
||||
};
|
||||
|
||||
struct tok {
|
||||
enum tokty ty;
|
||||
char const *ptr;
|
||||
size_t len;
|
||||
struct loc loc;
|
||||
};
|
||||
|
||||
struct tokenizer {
|
||||
char const *text;
|
||||
size_t len;
|
||||
size_t idx;
|
||||
int line;
|
||||
int col;
|
||||
bool failed;
|
||||
};
|
||||
|
||||
static void t_step(struct tokenizer *t)
|
||||
{
|
||||
if (t->idx >= t->len)
|
||||
return;
|
||||
if (t->text[t->idx] == '\n') {
|
||||
t->line += 1;
|
||||
t->col = 1;
|
||||
} else {
|
||||
t->col += 1;
|
||||
}
|
||||
t->idx += 1;
|
||||
}
|
||||
|
||||
static struct tok t_tok(struct tokenizer *t, enum tokty ty, struct loc loc)
|
||||
{
|
||||
return (struct tok) { ty, &t->text[loc.idx], t->idx - loc.idx, loc };
|
||||
}
|
||||
|
||||
struct tok tokenizer_next(struct tokenizer *t)
|
||||
{
|
||||
struct loc loc = { t->idx, t->line, t->col };
|
||||
size_t *i = &t->idx;
|
||||
if (*i >= t->len) {
|
||||
return t_tok(t, tt_eof, loc);
|
||||
}
|
||||
bool matched = false;
|
||||
while (*i < t->len && strchr(" \t\r\n", t->text[*i]) != NULL) {
|
||||
matched = true;
|
||||
*i += 1;
|
||||
}
|
||||
if (matched) {
|
||||
return tokenizer_next(t);
|
||||
}
|
||||
if (strchr(",:[]{}0", t->text[*i]) != NULL) {
|
||||
enum tokty ty = (enum tokty)t->text[*i];
|
||||
*i += 1;
|
||||
return t_tok(t, ty, loc);
|
||||
}
|
||||
while (*i < t->len && t->text[*i] >= 'a' && t->text[*i] <= 'z') {
|
||||
matched = true;
|
||||
*i += 1;
|
||||
}
|
||||
if (matched) {
|
||||
char const *kws[] = { "null", "false", "true" };
|
||||
enum tokty tys[] = { tt_null, tt_false, tt_true };
|
||||
for (size_t kw_i = 0; kw_i < sizeof(kws) / sizeof(kws[0]); ++kw_i) {
|
||||
if (strncmp(kws[kw_i], &t->text[loc.idx], *i - loc.idx)) {
|
||||
return t_tok(t, tys[kw_i], loc);
|
||||
}
|
||||
}
|
||||
report(loc, "invalid identifier", t->text, t->len);
|
||||
t->failed = true;
|
||||
return tokenizer_next(t);
|
||||
}
|
||||
if (t->text[*i] >= '1' && t->text[*i] <= '9') {
|
||||
while (*i < t->len && t->text[*i] >= '0' && t->text[*i] <= '9') {
|
||||
*i += 1;
|
||||
}
|
||||
enum tokty ty = tt_int;
|
||||
if (*i < t->len && t->text[*i] == '.') {
|
||||
ty = tt_float;
|
||||
*i += 1;
|
||||
while (*i < t->len && t->text[*i] >= '0' && t->text[*i] <= '9') {
|
||||
*i += 1;
|
||||
}
|
||||
}
|
||||
return t_tok(t, ty, loc);
|
||||
}
|
||||
if (t->text[*i] == '\"') {
|
||||
i += 1;
|
||||
while (*i < t->len && t->text[*i] != '\"') {
|
||||
if (t->text[*i] != '\\') {
|
||||
*i += 1;
|
||||
if (*i >= t->len)
|
||||
break;
|
||||
}
|
||||
*i += 1;
|
||||
}
|
||||
if (*i >= t->len && t->text[*i] != '\"') {
|
||||
report(loc, "malformed string", t->text, t->len);
|
||||
t->failed = true;
|
||||
return tokenizer_next(t);
|
||||
}
|
||||
*i += 1;
|
||||
return t_tok(t, tt_string, loc);
|
||||
}
|
||||
report(loc, "illegal character", t->text, t->len);
|
||||
t->failed = true;
|
||||
*i += 1;
|
||||
return tokenizer_next(t);
|
||||
}
|
||||
|
||||
enum valty {
|
||||
vt_null,
|
||||
vt_number,
|
||||
vt_string,
|
||||
vt_array,
|
||||
vt_object,
|
||||
};
|
||||
|
||||
struct val {
|
||||
enum valty ty;
|
||||
union {
|
||||
int64_t int_val;
|
||||
double float_val;
|
||||
char *string_val;
|
||||
struct {
|
||||
struct val *array_data;
|
||||
size_t *array_count;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
if (argc < 3) {
|
||||
@ -238,68 +48,16 @@ int main(int argc, char *argv[])
|
||||
}
|
||||
fclose(file);
|
||||
|
||||
struct tokenizer tokenizer = { text, file_size, 0, 1, 1, false };
|
||||
|
||||
struct json_value *val = json_parse(text, file_size);
|
||||
free(text);
|
||||
|
||||
char const *keys[] = {
|
||||
"first",
|
||||
"second",
|
||||
"third",
|
||||
"fourth",
|
||||
"fifth",
|
||||
"sixth",
|
||||
"seventh",
|
||||
"eigth",
|
||||
"ninth",
|
||||
"tenth",
|
||||
"first",
|
||||
"seventh",
|
||||
"tenth",
|
||||
};
|
||||
char const *values[] = {
|
||||
"salery",
|
||||
"policy",
|
||||
"strike",
|
||||
"prophecy",
|
||||
"break",
|
||||
"down",
|
||||
"think",
|
||||
"about",
|
||||
"honey",
|
||||
"and",
|
||||
"the",
|
||||
"sweet",
|
||||
"sounds",
|
||||
};
|
||||
typedef struct json_value V;
|
||||
|
||||
struct hashmap map;
|
||||
hashmap_construct(&map);
|
||||
V *a = json_key(val, "foo");
|
||||
V *b = json_idx(a, 0);
|
||||
V *c = json_idx(a, 1);
|
||||
printf(".foo[0] = %ld\n", json_get_int(b));
|
||||
printf(".foo[1] = %s\n", json_get_string(c));
|
||||
|
||||
for (size_t i = 0; i < sizeof(keys) / sizeof(keys[0]); ++i) {
|
||||
hashmap_set(&map, keys[i], (void *)values[i]);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < sizeof(keys) / sizeof(keys[0]); ++i) {
|
||||
assert(hashmap_has(&map, keys[i]));
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < sizeof(keys) / sizeof(keys[0]); ++i) {
|
||||
switch (i) {
|
||||
case 0:
|
||||
assert(hashmap_get(&map, keys[i]) == values[10]);
|
||||
break;
|
||||
case 6:
|
||||
assert(hashmap_get(&map, keys[i]) == values[11]);
|
||||
break;
|
||||
case 9:
|
||||
assert(hashmap_get(&map, keys[i]) == values[12]);
|
||||
break;
|
||||
default:
|
||||
assert(hashmap_get(&map, keys[i]) == values[i]);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
hashmap_destroy(&map);
|
||||
json_free(val);
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user