diff --git a/Makefile b/Makefile index 995b184..abd6afb 100644 --- a/Makefile +++ b/Makefile @@ -11,6 +11,8 @@ obj_dir = $(build_dir)/obj sources = \ src/main.c \ + src/json_parse.c \ + src/json_value.c \ src/collections.c target=$(build_dir)/jq diff --git a/data.json b/data.json index 81a2d3b..a4c1715 100644 --- a/data.json +++ b/data.json @@ -1,4 +1,5 @@ { - "foo": "bar" + "baz": false, + "foo": [123, "bar"] } diff --git a/src/collections.c b/src/collections.c index 31dc1b5..08afe88 100644 --- a/src/collections.c +++ b/src/collections.c @@ -6,6 +6,13 @@ #include #include +#define MIN(A, B) ((A) <= (B) ? (A) : (B)) + +#define MAX(A, B) ((A) >= (B) ? (A) : (B)) + +#define ALIGN(VAL, ALIGN) \ + ((VAL) % (ALIGN) != 0 ? (VAL) + ((ALIGN) - (VAL) % (ALIGN)) : (VAL)) + void *array_push(void **data, size_t *capacity, size_t *count, @@ -51,6 +58,90 @@ void *array_insert_at(void **data, return src_ptr; } +void smallarray_construct(struct smallarray *a) +{ + *a = (struct smallarray) { 0 }; +} + +void smallarray_destroy(struct smallarray *a) +{ + if ((a->smalldata[0] & 1) == 0 && a->data) { + free(a->data); + } +} + +static bool sa_is_big(struct smallarray const *a) +{ + return a->data && (a->smalldata[0] & 1) == 0; +} + +static void sa_push_big(struct smallarray *a, void *value) +{ + size_t capacity = 8; + void **ptr = malloc(capacity * sizeof(void *)); + size_t count = 0; + + for (size_t i = 0; i < 3; ++i) { + if ((a->smalldata[i] & 1) == 0) + break; + ptr[i] = (void *)(a->smalldata[i] & ~1ull); + count += 1; + } + + a->capacity = capacity; + a->data = ptr; + a->count = count; + array_push( + (void **)&a->data, &a->capacity, &a->count, &value, sizeof(void *)); +} + +static void sa_push_small(struct smallarray *a, void *value) +{ + for (size_t i = 0; i < 3; ++i) { + if (a->smalldata[i] & 1) + continue; + a->smalldata[i] = (size_t)value | 1; + return; + } +} + +void smallarray_push(struct smallarray *a, void *value) +{ + assert((size_t)value % 2 == 0 && "pointer must be 2 bytes aligned"); + + if (sa_is_big(a)) { + sa_push_big(a, value); + } else { + sa_push_small(a, value); + } +} + +size_t smallarray_count(struct smallarray const *a) +{ + if (sa_is_big(a)) { + return a->count; + } else { + size_t count = 0; + for (size_t i = 0; i < 3; ++i) { + if ((a->smalldata[i] & 1) == 0) + break; + count += 1; + } + return count; + } +} + +void *smallarray_get(struct smallarray *a, size_t idx) +{ + if (sa_is_big(a)) { + return a->data[idx]; + } else { + if (idx >= 3 || (a->smalldata[idx] & 1) == 0) + return NULL; + return (void *)(a->smalldata[idx] & ~1ull); + } +} + static uint64_t hash_key(char const *data) { // djb2 @@ -65,6 +156,20 @@ static uint64_t hash_key(char const *data) return hash; } +static uint64_t hash_key_sized(char const *data, size_t size) +{ + // djb2 + + uint64_t hash = 5381; + unsigned char c; + + while (size-- > 0 && (c = (unsigned char)*data++)) { + hash = ((hash << 5) + hash) + c; // hash * 33 + c + } + + return hash; +} + void hashmap_construct(struct hashmap *t) { *t = (struct hashmap) { NULL, 0, 0 }; @@ -79,16 +184,16 @@ void hashmap_destroy(struct hashmap *t) static struct hash_entry *find_entry(struct hashmap *m, uint64_t hash) { - for (size_t b_idx = 0; b_idx < m->buckets_count; ++b_idx) { - struct hash_bucket *bucket = &m->buckets[b_idx]; - if (hash < bucket->first_hash || hash > bucket->last_hash) { + for (size_t i = 0; i < m->buckets_count; ++i) { + struct hash_bucket *bucket = &m->buckets[i]; + if (hash < bucket->first_hash) + break; + if (hash > bucket->last_hash) continue; - } - for (size_t e_idx = 0; e_idx < bucket->count; ++e_idx) { - struct hash_entry *entry = &bucket->entries[e_idx]; - if (entry->hash == hash) { + for (size_t j = 0; j < bucket->count; ++j) { + struct hash_entry *entry = &bucket->entries[j]; + if (entry->hash == hash) return entry; - } } } return NULL; @@ -111,9 +216,6 @@ static struct hash_bucket *insert_bucket_at(struct hashmap *m, size_t idx) sizeof(struct hash_bucket)); } -#define MIN(A, B) ((A) <= (B) ? (A) : (B)) -#define MAX(A, B) ((A) >= (B) ? (A) : (B)) - static struct hash_entry *add_entry_to_bucket( struct hash_bucket *b, uint64_t hash) { @@ -154,10 +256,8 @@ static struct hash_entry *make_entry(struct hashmap *m, uint64_t hash) assert(false); } -void hashmap_set(struct hashmap *m, char const *key, void *value) +static void hashmap_set_internal(struct hashmap *m, uint64_t hash, void *value) { - uint64_t hash = hash_key(key); - if (m->buckets_count == 0) { struct hash_bucket *bucket = insert_bucket_at(m, 0); bucket->entries[0] = (struct hash_entry) { hash, value }; @@ -176,15 +276,77 @@ void hashmap_set(struct hashmap *m, char const *key, void *value) return; } +void hashmap_set(struct hashmap *m, char const *key, void *value) +{ + uint64_t hash = hash_key(key); + hashmap_set_internal(m, hash, value); +} + +void hashmap_set_sized( + struct hashmap *m, char const *key, size_t key_size, void *value) +{ + uint64_t hash = hash_key_sized(key, key_size); + hashmap_set_internal(m, hash, value); +} + bool hashmap_has(struct hashmap *m, char const *key) { uint64_t hash = hash_key(key); return find_entry(m, hash) != NULL; } -void *hashmap_get(struct hashmap *m, char const *key) +static void *hashmap_get_internal(struct hashmap *m, uint64_t hash) { - uint64_t hash = hash_key(key); struct hash_entry *entry = find_entry(m, hash); return entry ? entry->value : NULL; } +void *hashmap_get(struct hashmap *m, char const *key) +{ + uint64_t hash = hash_key(key); + return hashmap_get_internal(m, hash); +} +void *hashmap_get_sized(struct hashmap *m, char const *key, size_t key_size) +{ + uint64_t hash = hash_key_sized(key, key_size); + return hashmap_get_internal(m, hash); +} + +struct blockalloc_block { + unsigned char *data; + size_t size; +}; + +void blockalloc_construct(struct blockalloc *a) +{ + *a = (struct blockalloc) { NULL, 0, 0, 0 }; +} + +void blockalloc_destroy(struct blockalloc *a) +{ + if (a->blocks) + free(a->blocks); +} + +void *blockalloc_alloc(struct blockalloc *a, size_t size, size_t align) +{ + size_t p = ALIGN(a->p, align); + if (!a->blocks || p + size > a->blocks[a->count - 1].size) { + size_t block_size = MAX(blockalloc_default_block, size); + + struct blockalloc_block block = { + .data = malloc(block_size), + .size = block_size, + }; + + array_push((void **)&a->blocks, + &a->capacity, + &a->count, + &block, + sizeof(struct blockalloc_block)); + a->p = 0; + p = 0; + } + void *ptr = &a->blocks[a->count - 1].data[p]; + a->p = p + size; + return ptr; +} diff --git a/src/collections.h b/src/collections.h index 77d9690..87b92bb 100644 --- a/src/collections.h +++ b/src/collections.h @@ -18,6 +18,23 @@ void *array_insert_at(void **data, void const *elem, size_t elem_size); +struct smallarray { + union { + size_t smalldata[3]; + struct { + void **data; + size_t capacity; + size_t count; + }; + }; +}; + +void smallarray_construct(struct smallarray *a); +void smallarray_destroy(struct smallarray *a); +void smallarray_push(struct smallarray *a, void *value); +size_t smallarray_count(struct smallarray const *a); +void *smallarray_get(struct smallarray *a, size_t idx); + struct hash_entry { uint64_t hash; void *value; @@ -41,7 +58,25 @@ struct hashmap { void hashmap_construct(struct hashmap *m); void hashmap_destroy(struct hashmap *m); void hashmap_set(struct hashmap *m, char const *key, void *value); +void hashmap_set_sized( + struct hashmap *m, char const *key, size_t key_size, void *value); bool hashmap_has(struct hashmap *m, char const *key); void *hashmap_get(struct hashmap *m, char const *key); +void *hashmap_get_sized(struct hashmap *m, char const *key, size_t key_size); + +#define blockalloc_default_block 4096 + +struct blockalloc_block; + +struct blockalloc { + struct blockalloc_block *blocks; + size_t capacity; + size_t count; + size_t p; +}; + +void blockalloc_construct(struct blockalloc *a); +void blockalloc_destroy(struct blockalloc *a); +void *blockalloc_alloc(struct blockalloc *a, size_t size, size_t align); #endif diff --git a/src/json.h b/src/json.h new file mode 100644 index 0000000..7515b13 --- /dev/null +++ b/src/json.h @@ -0,0 +1,51 @@ +#ifndef JSON_H +#define JSON_H + +#include +#include +#include + +enum json_type { + json_null = 1, + json_false = 2, + json_true = 3, + json_int, + json_float, + json_string, + json_array, + json_object, +}; + +struct json_value; + +struct json_value *json_new(enum json_type type); +void json_free(struct json_value *value); + +bool json_is(struct json_value const *value, enum json_type type); + +bool json_get_bool(struct json_value const *value); +int64_t json_get_int(struct json_value const *value); +double json_get_float(struct json_value const *value); +char *json_get_string(struct json_value *value); + +void json_set_bool(struct json_value *value, bool val); +void json_set_int(struct json_value *value, int64_t val); +void json_set_float(struct json_value *value, double val); +void json_set_string(struct json_value *value, char *val); + +struct json_value *json_idx(struct json_value *array, size_t idx); +void json_push(struct json_value *array, struct json_value *value); + +struct json_value *json_key(struct json_value *object, char const *key); +struct json_value *json_key_sized( + struct json_value *object, char const *key, size_t key_size); +void json_set( + struct json_value *object, char const *key, struct json_value *value); +void json_set_sized(struct json_value *object, + char const *key, + size_t key_size, + struct json_value *value); + +struct json_value *json_parse(char const *text, size_t text_size); + +#endif diff --git a/src/json_parse.c b/src/json_parse.c new file mode 100644 index 0000000..a59fa85 --- /dev/null +++ b/src/json_parse.c @@ -0,0 +1,339 @@ +#include "collections.h" +#include "json.h" +#include +#include +#include +#include +#include +#include +#include + +struct loc { + size_t idx; + int line; + int col; +}; + +static void report( + struct loc loc, char const *message, char const *text, size_t text_len) +{ + fprintf(stderr, "error: %s\n", message); + if (!text) + return; + assert(text[loc.idx] != '\n'); + size_t line_begin_idx = loc.idx; + while (line_begin_idx > 0 && text[line_begin_idx] != '\n') { + line_begin_idx -= 1; + } + if (text[line_begin_idx] == '\n') { + line_begin_idx += 1; + } + size_t line_end_idx = loc.idx + 1; + while (line_end_idx < text_len && text[line_end_idx] != '\n') { + line_end_idx += 1; + } + if (line_end_idx >= text_len || text[line_end_idx] == '\n') { + line_end_idx -= 1; + } + int linenr_width = snprintf(NULL, 0, "%d", loc.line); + static char const *spaces = " "; + printf("%.*s|\n" + "%d|%.*s\n" + "%.*s|%.*s^\n" + "%.*s|\n", + linenr_width, + spaces, + loc.line, + (int)(line_end_idx - line_begin_idx + 1), + &text[line_begin_idx], + linenr_width, + spaces, + loc.col - 1, + spaces, + linenr_width, + spaces); +} + +enum tokty { + tt_eof, + tt_null = json_null, + tt_false = json_false, + tt_true = json_true, + tt_string, + tt_float, + tt_int = '0', + tt_comma = ',', + tt_colon = ':', + tt_lbracket = '[', + tt_rbracket = ']', + tt_lbrace = '{', + tt_rbrace = '}', +}; + +struct tok { + enum tokty ty; + char const *ptr; + size_t len; + struct loc loc; +}; + +struct tokenizer { + char const *text; + size_t len; + size_t idx; + int line; + int col; + bool failed; +}; + +static void t_step(struct tokenizer *t) +{ + if (t->idx >= t->len) + return; + if (t->text[t->idx] == '\n') { + t->line += 1; + t->col = 1; + } else { + t->col += 1; + } + t->idx += 1; +} + +static struct tok t_tok(struct tokenizer *t, enum tokty ty, struct loc loc) +{ + return (struct tok) { ty, &t->text[loc.idx], t->idx - loc.idx, loc }; +} + +static struct tok tokenizer_next(struct tokenizer *t); + +static struct tok t_make_ident_tok( + struct tokenizer *t, struct loc loc, size_t *i) +{ + char const *kws[] = { "null", "false", "true" }; + enum tokty tys[] = { tt_null, tt_false, tt_true }; + for (size_t kw_i = 0; kw_i < sizeof(kws) / sizeof(kws[0]); ++kw_i) { + if (strncmp(kws[kw_i], &t->text[loc.idx], *i - loc.idx)) { + return t_tok(t, tys[kw_i], loc); + } + } + report(loc, "invalid identifier", t->text, t->len); + t->failed = true; + return tokenizer_next(t); +} + +static struct tok t_make_number_tok( + struct tokenizer *t, struct loc loc, size_t *i) +{ + while (*i < t->len && t->text[*i] >= '0' && t->text[*i] <= '9') { + t_step(t); + } + enum tokty ty = tt_int; + if (*i < t->len && t->text[*i] == '.') { + ty = tt_float; + t_step(t); + while (*i < t->len && t->text[*i] >= '0' && t->text[*i] <= '9') { + t_step(t); + } + } + return t_tok(t, ty, loc); +} + +static struct tok t_make_string_tok( + struct tokenizer *t, struct loc loc, size_t *i) +{ + t_step(t); + while (*i < t->len && t->text[*i] != '\"') { + if (t->text[*i] == '\\') { + t_step(t); + if (*i >= t->len) + break; + } + t_step(t); + } + if (*i >= t->len && t->text[*i] != '\"') { + report(loc, "malformed string", t->text, t->len); + t->failed = true; + return tokenizer_next(t); + } + t_step(t); + return t_tok(t, tt_string, loc); +} + +static struct tok tokenizer_next(struct tokenizer *t) +{ + struct loc loc = { t->idx, t->line, t->col }; + size_t *i = &t->idx; + if (*i >= t->len) { + return t_tok(t, tt_eof, loc); + } + bool matched = false; + while (*i < t->len && strchr(" \t\r\n", t->text[*i]) != NULL) { + matched = true; + t_step(t); + } + if (matched) { + return tokenizer_next(t); + } + if (strchr(",:[]{}0", t->text[*i]) != NULL) { + enum tokty ty = (enum tokty)t->text[*i]; + t_step(t); + return t_tok(t, ty, loc); + } + while (*i < t->len && t->text[*i] >= 'a' && t->text[*i] <= 'z') { + matched = true; + t_step(t); + } + if (matched) { + return t_make_ident_tok(t, loc, i); + } + if (t->text[*i] >= '1' && t->text[*i] <= '9') { + return t_make_number_tok(t, loc, i); + } + if (t->text[*i] == '\"') { + return t_make_string_tok(t, loc, i); + } + report(loc, "illegal character", t->text, t->len); + t->failed = true; + t_step(t); + return tokenizer_next(t); +} + +struct parser { + struct tokenizer tokenizer; + struct tok tok; + struct blockalloc allocator; +}; + +static void p_step(struct parser *p) +{ + p->tok = tokenizer_next(&p->tokenizer); +} + +static void parser_construct( + struct parser *p, char const *text, size_t text_len) +{ + *p = (struct parser) { + .tokenizer = (struct tokenizer) { text, text_len, 0, 1, 1, false }, + .tok = (struct tok) { 0 }, + .allocator = (struct blockalloc) { 0 }, + }; + blockalloc_construct(&p->allocator); + p_step(p); +} + +static void p_report(struct parser *p, struct loc loc, char const *message) +{ + report(loc, message, p->tokenizer.text, p->tokenizer.len); +} + +static struct json_value *parser_parse(struct parser *p); + +static struct json_value *parser_parse_array(struct parser *p, enum tokty *ty) +{ + p_step(p); + struct json_value *val = json_new(json_array); + bool tail = false; + while (*ty != tt_eof && ((!tail && *ty != ']') || (tail && *ty == ','))) { + if (tail) + p_step(p); + struct json_value *child = parser_parse(p); + if (!child) + goto array_leave_error_free_val; + json_push(val, child); + tail = true; + } + if (*ty == tt_eof || *ty != ']') { + p_report(p, p->tok.loc, "expected ']'"); + goto array_leave_error_free_val; + } + p_step(p); + return val; +array_leave_error_free_val: + json_free(val); + return NULL; +} + +static struct json_value *parser_parse_object(struct parser *p, enum tokty *ty) +{ + p_step(p); + struct json_value *val = json_new(json_object); + bool tail = false; + while (*ty != tt_eof && ((!tail && *ty != '}') || (tail && *ty == ','))) { + if (tail) + p_step(p); + if (*ty != tt_string) { + p_report(p, p->tok.loc, "expected string"); + goto object_leave_error_free_val; + } + struct tok key_tok = p->tok; + p_step(p); + if (*ty != ':') { + p_report(p, p->tok.loc, "expected ':'"); + goto object_leave_error_free_val; + } + p_step(p); + struct json_value *child = parser_parse(p); + if (!child) + goto object_leave_error_free_val; + json_set_sized(val, key_tok.ptr + 1, key_tok.len - 2, child); + tail = true; + } + if (*ty == tt_eof || *ty != '}') { + p_report(p, p->tok.loc, "expected '}'"); + goto object_leave_error_free_val; + } + p_step(p); + return val; +object_leave_error_free_val: + json_free(val); + return NULL; +} + +static struct json_value *parser_parse(struct parser *p) +{ + struct loc loc = p->tok.loc; + enum tokty *ty = &p->tok.ty; + + if (*ty == tt_null || *ty == tt_false || *ty == tt_true) { + struct json_value *val = json_new((enum json_type) * ty); + p_step(p); + return val; + } else if (*ty == tt_int) { + int64_t value = strtol(p->tok.ptr, NULL, 10); + struct json_value *val = json_new(json_int); + json_set_int(val, value); + p_step(p); + return val; + } else if (*ty == tt_float) { + double value = strtod(p->tok.ptr, NULL); + struct json_value *val = json_new(json_float); + json_set_float(val, value); + p_step(p); + return val; + } else if (*ty == tt_string) { + char *value = blockalloc_alloc(&p->allocator, p->tok.len - 2 + 1, 2); + strncpy(value, p->tok.ptr + 1, p->tok.len - 2); + value[p->tok.len - 2] = '\0'; + struct json_value *val = json_new(json_string); + json_set_string(val, value); + p_step(p); + return val; + } else if (*ty == '[') { + return parser_parse_array(p, ty); + } else if (*ty == '{') { + return parser_parse_object(p, ty); + } else { + p_report(p, loc, "expected expression"); + return NULL; + } +} + +struct json_value *json_parse(char const *text, size_t text_size) +{ + text_size = text_size > 0 ? text_size : strlen(text); + + struct parser p; + parser_construct(&p, text, text_size); + + return parser_parse(&p); +} diff --git a/src/json_value.c b/src/json_value.c new file mode 100644 index 0000000..5fd097e --- /dev/null +++ b/src/json_value.c @@ -0,0 +1,153 @@ +#include "collections.h" +#include "json.h" +#include +#include +#include +#include +#include +#include +#include + +struct json_value { + enum json_type ty; + union { + int64_t int_val; + double float_val; + char *string_val; + struct smallarray array_vals; + struct hashmap object_fields; + }; +}; + +struct json_value *json_new(enum json_type ty) +{ + struct json_value *v = malloc(sizeof(struct json_value)); + *v = (struct json_value) { .ty = ty }; + switch (ty) { + case json_null: + case json_false: + case json_true: + case json_int: + case json_float: + case json_string: + break; + case json_array: + smallarray_construct(&v->array_vals); + break; + case json_object: + hashmap_construct(&v->object_fields); + break; + } + return v; +} + +void json_free(struct json_value *v) +{ + switch (v->ty) { + case json_null: + case json_false: + case json_true: + case json_int: + case json_float: + break; + case json_string: + if (v->string_val) + free(v->string_val); + break; + case json_array: + smallarray_destroy(&v->array_vals); + break; + case json_object: + hashmap_destroy(&v->object_fields); + break; + } + free(v); +} + +bool json_is(struct json_value const *value, enum json_type type) +{ + return value->ty == type; +} + +bool json_get_bool(struct json_value const *value) +{ + assert(value->ty == json_true || value->ty == json_false); + return value->ty == json_true; +} +int64_t json_get_int(struct json_value const *value) +{ + assert(value->ty == json_int); + return value->int_val; +} +double json_get_float(struct json_value const *value) +{ + assert(value->ty == json_float); + return value->float_val; +} +char *json_get_string(struct json_value *value) +{ + assert(value->ty == json_string); + return value->string_val; +} + +void json_set_bool(struct json_value *value, bool val) +{ + assert(value->ty == json_true || value->ty == json_false); + value->ty = val ? json_true : json_false; +} +void json_set_int(struct json_value *value, int64_t val) +{ + assert(value->ty == json_int); + value->int_val = val; +} +void json_set_float(struct json_value *value, double val) +{ + assert(value->ty == json_float); + value->float_val = val; +} +void json_set_string(struct json_value *value, char *val) +{ + assert(value->ty == json_string); + value->string_val = val; +} + +struct json_value *json_idx(struct json_value *array, size_t idx) +{ + assert(array->ty == json_array); + return smallarray_get(&array->array_vals, idx); +} + +void json_push(struct json_value *array, struct json_value *value) +{ + assert(array->ty == json_array); + smallarray_push(&array->array_vals, value); +} + +struct json_value *json_key(struct json_value *object, char const *key) +{ + assert(object->ty == json_object); + return hashmap_get(&object->object_fields, key); +} + +struct json_value *json_key_sized( + struct json_value *object, char const *key, size_t key_size) +{ + assert(object->ty == json_object); + return hashmap_get_sized(&object->object_fields, key, key_size); +} + +void json_set( + struct json_value *object, char const *key, struct json_value *value) +{ + assert(object->ty == json_object); + hashmap_set(&object->object_fields, key, value); +} + +void json_set_sized(struct json_value *object, + char const *key, + size_t key_size, + struct json_value *value) +{ + assert(object->ty == json_object); + hashmap_set_sized(&object->object_fields, key, key_size, value); +} diff --git a/src/main.c b/src/main.c index 4b36649..11a9d34 100644 --- a/src/main.c +++ b/src/main.c @@ -1,4 +1,5 @@ #include "collections.h" +#include "json.h" #include #include #include @@ -8,197 +9,6 @@ #include #include -struct loc { - size_t idx; - int line; - int col; -}; - -void report( - struct loc loc, char const *message, char const *text, size_t text_len) -{ - fprintf(stderr, "error: %s\n", message); - if (!text) - return; - assert(text[loc.idx] != '\n'); - size_t line_begin_idx = loc.idx; - while (line_begin_idx > 0 && text[line_begin_idx] != '\n') { - line_begin_idx -= 1; - } - if (text[line_begin_idx] == '\n') { - line_begin_idx += 1; - } - size_t line_end_idx = loc.idx + 1; - while (line_end_idx < text_len && text[line_end_idx] != '\n') { - line_end_idx += 1; - } - if (line_end_idx >= text_len || text[line_end_idx] == '\n') { - line_end_idx -= 1; - } - int linenr_width = snprintf(NULL, 0, "%d", loc.line); - static char const *spaces = " "; - printf("%.*s|\n" - "%d|%.*s\n" - "%.*s|%.*s^\n" - "%.*s|\n", - linenr_width, - spaces, - loc.line, - (int)(line_end_idx - line_begin_idx + 1), - &text[line_begin_idx], - linenr_width, - spaces, - loc.col - 1, - spaces, - linenr_width, - spaces); -} - -enum tokty { - tt_eof, - tt_null, - tt_false, - tt_true, - tt_string, - tt_float, - tt_int = '0', - tt_comma = ',', - tt_colon = ':', - tt_lbracket = '[', - tt_rbracket = ']', - tt_lbrace = '{', - tt_rbrace = '}', -}; - -struct tok { - enum tokty ty; - char const *ptr; - size_t len; - struct loc loc; -}; - -struct tokenizer { - char const *text; - size_t len; - size_t idx; - int line; - int col; - bool failed; -}; - -static void t_step(struct tokenizer *t) -{ - if (t->idx >= t->len) - return; - if (t->text[t->idx] == '\n') { - t->line += 1; - t->col = 1; - } else { - t->col += 1; - } - t->idx += 1; -} - -static struct tok t_tok(struct tokenizer *t, enum tokty ty, struct loc loc) -{ - return (struct tok) { ty, &t->text[loc.idx], t->idx - loc.idx, loc }; -} - -struct tok tokenizer_next(struct tokenizer *t) -{ - struct loc loc = { t->idx, t->line, t->col }; - size_t *i = &t->idx; - if (*i >= t->len) { - return t_tok(t, tt_eof, loc); - } - bool matched = false; - while (*i < t->len && strchr(" \t\r\n", t->text[*i]) != NULL) { - matched = true; - *i += 1; - } - if (matched) { - return tokenizer_next(t); - } - if (strchr(",:[]{}0", t->text[*i]) != NULL) { - enum tokty ty = (enum tokty)t->text[*i]; - *i += 1; - return t_tok(t, ty, loc); - } - while (*i < t->len && t->text[*i] >= 'a' && t->text[*i] <= 'z') { - matched = true; - *i += 1; - } - if (matched) { - char const *kws[] = { "null", "false", "true" }; - enum tokty tys[] = { tt_null, tt_false, tt_true }; - for (size_t kw_i = 0; kw_i < sizeof(kws) / sizeof(kws[0]); ++kw_i) { - if (strncmp(kws[kw_i], &t->text[loc.idx], *i - loc.idx)) { - return t_tok(t, tys[kw_i], loc); - } - } - report(loc, "invalid identifier", t->text, t->len); - t->failed = true; - return tokenizer_next(t); - } - if (t->text[*i] >= '1' && t->text[*i] <= '9') { - while (*i < t->len && t->text[*i] >= '0' && t->text[*i] <= '9') { - *i += 1; - } - enum tokty ty = tt_int; - if (*i < t->len && t->text[*i] == '.') { - ty = tt_float; - *i += 1; - while (*i < t->len && t->text[*i] >= '0' && t->text[*i] <= '9') { - *i += 1; - } - } - return t_tok(t, ty, loc); - } - if (t->text[*i] == '\"') { - i += 1; - while (*i < t->len && t->text[*i] != '\"') { - if (t->text[*i] != '\\') { - *i += 1; - if (*i >= t->len) - break; - } - *i += 1; - } - if (*i >= t->len && t->text[*i] != '\"') { - report(loc, "malformed string", t->text, t->len); - t->failed = true; - return tokenizer_next(t); - } - *i += 1; - return t_tok(t, tt_string, loc); - } - report(loc, "illegal character", t->text, t->len); - t->failed = true; - *i += 1; - return tokenizer_next(t); -} - -enum valty { - vt_null, - vt_number, - vt_string, - vt_array, - vt_object, -}; - -struct val { - enum valty ty; - union { - int64_t int_val; - double float_val; - char *string_val; - struct { - struct val *array_data; - size_t *array_count; - }; - }; -}; - int main(int argc, char *argv[]) { if (argc < 3) { @@ -238,68 +48,16 @@ int main(int argc, char *argv[]) } fclose(file); - struct tokenizer tokenizer = { text, file_size, 0, 1, 1, false }; - + struct json_value *val = json_parse(text, file_size); free(text); - char const *keys[] = { - "first", - "second", - "third", - "fourth", - "fifth", - "sixth", - "seventh", - "eigth", - "ninth", - "tenth", - "first", - "seventh", - "tenth", - }; - char const *values[] = { - "salery", - "policy", - "strike", - "prophecy", - "break", - "down", - "think", - "about", - "honey", - "and", - "the", - "sweet", - "sounds", - }; + typedef struct json_value V; - struct hashmap map; - hashmap_construct(&map); + V *a = json_key(val, "foo"); + V *b = json_idx(a, 0); + V *c = json_idx(a, 1); + printf(".foo[0] = %ld\n", json_get_int(b)); + printf(".foo[1] = %s\n", json_get_string(c)); - for (size_t i = 0; i < sizeof(keys) / sizeof(keys[0]); ++i) { - hashmap_set(&map, keys[i], (void *)values[i]); - } - - for (size_t i = 0; i < sizeof(keys) / sizeof(keys[0]); ++i) { - assert(hashmap_has(&map, keys[i])); - } - - for (size_t i = 0; i < sizeof(keys) / sizeof(keys[0]); ++i) { - switch (i) { - case 0: - assert(hashmap_get(&map, keys[i]) == values[10]); - break; - case 6: - assert(hashmap_get(&map, keys[i]) == values[11]); - break; - case 9: - assert(hashmap_get(&map, keys[i]) == values[12]); - break; - default: - assert(hashmap_get(&map, keys[i]) == values[i]); - break; - } - } - - hashmap_destroy(&map); + json_free(val); }