commit 24be8185c63f0865d1ed8c5fd69f29790927b747 Author: sfja Date: Wed Mar 18 23:55:33 2026 +0100 init diff --git a/.clang-format b/.clang-format new file mode 100644 index 0000000..9e0bd38 --- /dev/null +++ b/.clang-format @@ -0,0 +1,18 @@ +BasedOnStyle: WebKit +IndentWidth: 4 +ColumnLimit: 80 + +IndentCaseLabels: true +InsertNewlineAtEOF: true +AllowShortFunctionsOnASingleLine: None + +BinPackArguments: false +BinPackLongBracedList: false +BinPackParameters: OnePerLine + +AllowAllArgumentsOnNextLine: true +AllowAllParametersOfDeclarationOnNextLine: true + +PointerAlignment: Right +QualifierAlignment: Right + diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..567609b --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +build/ diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..34b80eb --- /dev/null +++ b/Makefile @@ -0,0 +1,29 @@ + +MAKEFLAGS += -j16 + +CFLAGS=-std=c17 -pedantic-errors -Wall -Wextra -Wconversion -fsanitize=address +LDFLAGS= + +build_dir = build +obj_dir = $(build_dir)/obj + +sources = \ + src/main.c \ + src/collections.c + +target=jq + +all: $(build_dir)/$(target) + +$(build_dir)/$(target): $(sources:%.c=$(obj_dir)/%.o) + gcc -o $@ $(CFLAGS) $(LDFLAGS) $^ + +$(obj_dir)/%.o: %.c + @mkdir -p $(dir $@) + gcc $< -c -o $@ -MMD -MP $(CFLAGS) + +clean: + rm -rf $(build_dir) + +-include $(sources:%.c=$(obj_dir)/%.d) + diff --git a/compile_flags.txt b/compile_flags.txt new file mode 100644 index 0000000..abc7b40 --- /dev/null +++ b/compile_flags.txt @@ -0,0 +1,6 @@ +-xc +-std=c17 +-pedantic-errors +-Wall +-Wextra +-Wconversion diff --git a/data.json b/data.json new file mode 100644 index 0000000..81a2d3b --- /dev/null +++ b/data.json @@ -0,0 +1,4 @@ +{ + "foo": "bar" +} + diff --git a/src/collections.c b/src/collections.c new file mode 100644 index 0000000..0c05912 --- /dev/null +++ b/src/collections.c @@ -0,0 +1,152 @@ +#include "collections.h" +#include +#include +#include +#include + +void *array_push(void **data, + size_t *capacity, + size_t *count, + void const *elem, + size_t elem_size) +{ + if (!*data) { + *capacity = 8; + *data = malloc(*capacity * elem_size); + *count = 0; + } else if (*count + 1 >= *capacity) { + *capacity *= 2; + *data = realloc(*data, *capacity * elem_size); + } + void *ptr = &((unsigned char *)*data)[*count * elem_size]; + if (elem) { + memcpy(ptr, elem, elem_size); + } + return ptr; +} + +void *array_insert_at(void **data, + size_t *capacity, + size_t *count, + size_t idx, + void const *elem, + size_t elem_size) +{ + if (idx >= *count) { + return array_push(data, capacity, count, elem, elem_size); + } + void *src_ptr = &((unsigned char *)*data)[idx * elem_size]; + void *dest_ptr = &((unsigned char *)*data)[(idx + 1) * elem_size]; + memmove(dest_ptr, src_ptr, (*count - idx) * elem_size); + *count += 1; + if (elem) { + memcpy(src_ptr, elem, elem_size); + } + return src_ptr; +} + +static uint64_t hash_key(char const *data) +{ + // djb2 + + uint64_t hash = 5381; + unsigned char c; + + while ((c = (unsigned char)*data++)) { + hash = ((hash << 5) + hash) + c; // hash * 33 + c + } + + return hash; +} + +void hashmap_construct(struct hashmap *t) +{ + *t = (struct hashmap) { NULL, 0, 0 }; +} + +static size_t const bucket_size = 16; + +static struct hash_entry *find_entry(struct hashmap *t, uint64_t hash) +{ + for (size_t b_idx = 0; b_idx < t->buckets_count; ++b_idx) { + struct hash_bucket *bucket = &t->buckets[b_idx]; + if (hash < bucket->first_hash || hash > bucket->last_hash) { + continue; + } + for (size_t e_idx = 0; e_idx < bucket->count; ++e_idx) { + struct hash_entry *entry = &bucket->entries[e_idx]; + if (entry->hash == hash) { + return entry; + } + } + } + return NULL; +} + +static struct hash_bucket *insert_bucket_at(struct hashmap *t, size_t idx) +{ + struct hash_bucket bucket = { + .entries = malloc(sizeof(struct hash_entry) * bucket_size), + .count = 0, + .first_hash = 0, + .last_hash = 0, + }; + + return array_insert_at((void **)&t->buckets, + &t->buckets_capacity, + &t->buckets_count, + idx, + &bucket, + sizeof(struct hash_bucket)); +} + +static struct hash_entry *make_entry(struct hashmap *t, uint64_t hash) +{ + for (size_t i = 0; i < t->buckets_count; ++i) { + struct hash_bucket *bucket = &t->buckets[i]; + if (hash < bucket->first_hash) { + if (bucket->count < bucket_size) { + bucket->count += 1; + bucket->first_hash = hash; + return &bucket->entries[bucket->count - 1]; + } + bucket = insert_bucket_at(t, i); + bucket->first_hash = hash; + bucket->last_hash = hash; + bucket->entries[0].hash = hash; + return &bucket->entries[0]; + } + if (hash <= bucket->last_hash) { + if (bucket->count >= bucket_size) { + bucket = insert_bucket_at(t, i); + bucket->first_hash = hash; + bucket->last_hash = hash; + bucket->entries[0].hash = hash; + return &bucket->entries[0]; + } + } + } +} + +void hashmap_set(struct hashmap *t, char const *key, void *value) +{ + uint64_t hash = hash_key(key); + + if (t->buckets_count == 0) { + struct hash_bucket *bucket = insert_bucket_at(t, 0); + bucket->entries[0] = (struct hash_entry) { hash, value }; + return; + } + + struct hash_entry *entry = find_entry(t, hash); + if (!entry) { + entry = make_entry(t, hash); + } + entry->value = value; + return; +} + +void *hashmap_get(struct hashmap *t, char const *key) +{ + uint64_t hash = hash_key(key); +} diff --git a/src/collections.h b/src/collections.h new file mode 100644 index 0000000..089e325 --- /dev/null +++ b/src/collections.h @@ -0,0 +1,43 @@ +#ifndef COLLECTIONS_H +#define COLLECTIONS_H + +#include +#include + +void *array_push(void **data, + size_t *capacity, + size_t *count, + void const *elem, + size_t elem_size); + +void *array_insert_at(void **data, + size_t *capacity, + size_t *count, + size_t idx, + void const *elem, + size_t elem_size); + +struct hash_entry { + uint64_t hash; + void *value; +}; + +struct hash_bucket { + struct hash_entry *entries; + size_t count; + uint64_t first_hash; + uint64_t last_hash; +}; + +struct hashmap { + struct hash_bucket *buckets; + size_t buckets_capacity; + size_t buckets_count; +}; + +void hashmap_construct(struct hashmap *t); +void hashmap_destroy(struct hashmap *t); +void hashmap_set(struct hashmap *t, char const *key, void *value); +void *hashmap_get(struct hashmap *t, char const *key); + +#endif diff --git a/src/main.c b/src/main.c new file mode 100644 index 0000000..99bf592 --- /dev/null +++ b/src/main.c @@ -0,0 +1,240 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +struct loc { + size_t idx; + int line; + int col; +}; + +void report( + struct loc loc, char const *message, char const *text, size_t text_len) +{ + fprintf(stderr, "error: %s\n", message); + if (!text) + return; + assert(text[loc.idx] != '\n'); + size_t line_begin_idx = loc.idx; + while (line_begin_idx > 0 && text[line_begin_idx] != '\n') { + line_begin_idx -= 1; + } + if (text[line_begin_idx] == '\n') { + line_begin_idx += 1; + } + size_t line_end_idx = loc.idx + 1; + while (line_end_idx < text_len && text[line_end_idx] != '\n') { + line_end_idx += 1; + } + if (line_end_idx >= text_len || text[line_end_idx] == '\n') { + line_end_idx -= 1; + } + int linenr_width = snprintf(NULL, 0, "%d", loc.line); + static char const *spaces = " "; + printf("%.*s|\n" + "%d|%.*s\n" + "%.*s|%.*s^\n" + "%.*s|\n", + linenr_width, + spaces, + loc.line, + (int)(line_end_idx - line_begin_idx + 1), + &text[line_begin_idx], + linenr_width, + spaces, + loc.col - 1, + spaces, + linenr_width, + spaces); +} + +enum tokty { + tt_eof, + tt_null, + tt_false, + tt_true, + tt_string, + tt_float, + tt_int = '0', + tt_comma = ',', + tt_colon = ':', + tt_lbracket = '[', + tt_rbracket = ']', + tt_lbrace = '{', + tt_rbrace = '}', +}; + +struct tok { + enum tokty ty; + char const *ptr; + size_t len; + struct loc loc; +}; + +struct tokenizer { + char const *text; + size_t len; + size_t idx; + int line; + int col; + bool failed; +}; + +static void t_step(struct tokenizer *t) +{ + if (t->idx >= t->len) + return; + if (t->text[t->idx] == '\n') { + t->line += 1; + t->col = 1; + } else { + t->col += 1; + } + t->idx += 1; +} + +static struct tok t_tok(struct tokenizer *t, enum tokty ty, struct loc loc) +{ + return (struct tok) { ty, &t->text[loc.idx], t->idx - loc.idx, loc }; +} + +struct tok tokenizer_next(struct tokenizer *t) +{ + struct loc loc = { t->idx, t->line, t->col }; + size_t *i = &t->idx; + if (*i >= t->len) { + return t_tok(t, tt_eof, loc); + } + bool matched = false; + while (*i < t->len && strchr(" \t\r\n", t->text[*i]) != NULL) { + matched = true; + *i += 1; + } + if (matched) { + return tokenizer_next(t); + } + if (strchr(",:[]{}0", t->text[*i]) != NULL) { + enum tokty ty = (enum tokty)t->text[*i]; + *i += 1; + return t_tok(t, ty, loc); + } + while (*i < t->len && t->text[*i] >= 'a' && t->text[*i] <= 'z') { + matched = true; + *i += 1; + } + if (matched) { + char const *kws[] = { "null", "false", "true" }; + enum tokty tys[] = { tt_null, tt_false, tt_true }; + for (size_t kw_i = 0; kw_i < sizeof(kws) / sizeof(kws[0]); ++kw_i) { + if (strncmp(kws[kw_i], &t->text[loc.idx], *i - loc.idx)) { + return t_tok(t, tys[kw_i], loc); + } + } + report(loc, "invalid identifier", t->text, t->len); + t->failed = true; + return tokenizer_next(t); + } + if (t->text[*i] >= '1' && t->text[*i] <= '9') { + while (*i < t->len && t->text[*i] >= '0' && t->text[*i] <= '9') { + *i += 1; + } + enum tokty ty = tt_int; + if (*i < t->len && t->text[*i] == '.') { + ty = tt_float; + *i += 1; + while (*i < t->len && t->text[*i] >= '0' && t->text[*i] <= '9') { + *i += 1; + } + } + return t_tok(t, ty, loc); + } + if (t->text[*i] == '\"') { + i += 1; + while (*i < t->len && t->text[*i] != '\"') { + if (t->text[*i] != '\\') { + *i += 1; + if (*i >= t->len) + break; + } + *i += 1; + } + if (*i >= t->len && t->text[*i] != '\"') { + report(loc, "malformed string", t->text, t->len); + t->failed = true; + return tokenizer_next(t); + } + *i += 1; + return t_tok(t, tt_string, loc); + } + report(loc, "illegal character", t->text, t->len); + t->failed = true; + *i += 1; + return tokenizer_next(t); +} + +enum valty { + vt_null, + vt_number, + vt_string, + vt_array, + vt_object, +}; + +struct val { + enum valty ty; + union { + int64_t int_val; + double float_val; + char *string_val; + struct { + struct val *array_data; + size_t *array_count; + }; + }; +}; + +int main(int argc, char *argv[]) +{ + if (argc < 3) { + fprintf(stderr, "error: incorrect arguments\n"); + return EXIT_FAILURE; + } + char const *pattern = argv[1]; + char const *filename = argv[2]; + + FILE *file = fopen(filename, "r"); + if (!file) { + fprintf(stderr, + "error: could not open file (%s) \"%s\"\n", + strerror(errno), + filename); + return EXIT_FAILURE; + } + + if (fseek(file, 0, SEEK_END) != 0) { + fprintf(stderr, "error: could not seek (%s)\n", strerror(errno)); + return EXIT_FAILURE; + } + long ftell_result = ftell(file); + if (ftell_result < 0) { + fprintf(stderr, "error: could not tell (%s)\n", strerror(errno)); + return EXIT_FAILURE; + } + size_t file_size = (size_t)ftell_result; + char *text = malloc(file_size + 1); + + size_t bytes_read = fread(text, 1, file_size, file); + if (bytes_read != file_size) { + fprintf(stderr, "error: could not read (%s)\n", strerror(errno)); + return EXIT_FAILURE; + } + + struct tokenizer tokenizer = { text, file_size, 0, 1, 1 }; + + printf("hello\n"); +}