commit 24be8185c63f0865d1ed8c5fd69f29790927b747
Author: sfja <sfja2004@gmail.com>
Date:   Wed Mar 18 23:55:33 2026 +0100

    init

diff --git a/.clang-format b/.clang-format
new file mode 100644
index 0000000..9e0bd38
--- /dev/null
+++ b/.clang-format
@@ -0,0 +1,18 @@
+BasedOnStyle: WebKit
+IndentWidth: 4
+ColumnLimit: 80
+
+IndentCaseLabels: true
+InsertNewlineAtEOF: true
+AllowShortFunctionsOnASingleLine: None
+
+BinPackArguments: false
+BinPackLongBracedList: false
+BinPackParameters: OnePerLine
+
+AllowAllArgumentsOnNextLine: true
+AllowAllParametersOfDeclarationOnNextLine: true
+
+PointerAlignment: Right
+QualifierAlignment: Right
+
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..567609b
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1 @@
+build/
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..34b80eb
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,29 @@
+
+MAKEFLAGS += -j16
+
+CFLAGS=-std=c17 -pedantic-errors -Wall -Wextra -Wconversion -fsanitize=address
+LDFLAGS=
+
+build_dir = build
+obj_dir = $(build_dir)/obj
+
+sources = \
+	src/main.c \
+	src/collections.c
+
+target=jq
+
+all: $(build_dir)/$(target)
+
+$(build_dir)/$(target): $(sources:%.c=$(obj_dir)/%.o)
+	gcc -o $@ $(CFLAGS) $(LDFLAGS) $^
+
+$(obj_dir)/%.o: %.c
+	@mkdir -p $(dir $@)
+	gcc $< -c -o $@ -MMD -MP $(CFLAGS)
+
+clean:
+	rm -rf $(build_dir)
+
+-include $(sources:%.c=$(obj_dir)/%.d)
+
diff --git a/compile_flags.txt b/compile_flags.txt
new file mode 100644
index 0000000..abc7b40
--- /dev/null
+++ b/compile_flags.txt
@@ -0,0 +1,6 @@
+-xc
+-std=c17
+-pedantic-errors
+-Wall
+-Wextra
+-Wconversion
diff --git a/data.json b/data.json
new file mode 100644
index 0000000..81a2d3b
--- /dev/null
+++ b/data.json
@@ -0,0 +1,4 @@
+{
+    "foo": "bar"
+}
+
diff --git a/src/collections.c b/src/collections.c
new file mode 100644
index 0000000..0c05912
--- /dev/null
+++ b/src/collections.c
@@ -0,0 +1,152 @@
+#include "collections.h"
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+
+void *array_push(void **data,
+    size_t *capacity,
+    size_t *count,
+    void const *elem,
+    size_t elem_size)
+{
+    if (!*data) {
+        *capacity = 8;
+        *data = malloc(*capacity * elem_size);
+        *count = 0;
+    } else if (*count + 1 >= *capacity) {
+        *capacity *= 2;
+        *data = realloc(*data, *capacity * elem_size);
+    }
+    void *ptr = &((unsigned char *)*data)[*count * elem_size];
+    if (elem) {
+        memcpy(ptr, elem, elem_size);
+    }
+    return ptr;
+}
+
+void *array_insert_at(void **data,
+    size_t *capacity,
+    size_t *count,
+    size_t idx,
+    void const *elem,
+    size_t elem_size)
+{
+    if (idx >= *count) {
+        return array_push(data, capacity, count, elem, elem_size);
+    }
+    void *src_ptr = &((unsigned char *)*data)[idx * elem_size];
+    void *dest_ptr = &((unsigned char *)*data)[(idx + 1) * elem_size];
+    memmove(dest_ptr, src_ptr, (*count - idx) * elem_size);
+    *count += 1;
+    if (elem) {
+        memcpy(src_ptr, elem, elem_size);
+    }
+    return src_ptr;
+}
+
+static uint64_t hash_key(char const *data)
+{
+    // djb2
+
+    uint64_t hash = 5381;
+    unsigned char c;
+
+    while ((c = (unsigned char)*data++)) {
+        hash = ((hash << 5) + hash) + c; // hash * 33 + c
+    }
+
+    return hash;
+}
+
+void hashmap_construct(struct hashmap *t)
+{
+    *t = (struct hashmap) { NULL, 0, 0 };
+}
+
+static size_t const bucket_size = 16;
+
+static struct hash_entry *find_entry(struct hashmap *t, uint64_t hash)
+{
+    for (size_t b_idx = 0; b_idx < t->buckets_count; ++b_idx) {
+        struct hash_bucket *bucket = &t->buckets[b_idx];
+        if (hash < bucket->first_hash || hash > bucket->last_hash) {
+            continue;
+        }
+        for (size_t e_idx = 0; e_idx < bucket->count; ++e_idx) {
+            struct hash_entry *entry = &bucket->entries[e_idx];
+            if (entry->hash == hash) {
+                return entry;
+            }
+        }
+    }
+    return NULL;
+}
+
+static struct hash_bucket *insert_bucket_at(struct hashmap *t, size_t idx)
+{
+    struct hash_bucket bucket = {
+        .entries = malloc(sizeof(struct hash_entry) * bucket_size),
+        .count = 0,
+        .first_hash = 0,
+        .last_hash = 0,
+    };
+
+    return array_insert_at((void **)&t->buckets,
+        &t->buckets_capacity,
+        &t->buckets_count,
+        idx,
+        &bucket,
+        sizeof(struct hash_bucket));
+}
+
+static struct hash_entry *make_entry(struct hashmap *t, uint64_t hash)
+{
+    for (size_t i = 0; i < t->buckets_count; ++i) {
+        struct hash_bucket *bucket = &t->buckets[i];
+        if (hash < bucket->first_hash) {
+            if (bucket->count < bucket_size) {
+                bucket->count += 1;
+                bucket->first_hash = hash;
+                return &bucket->entries[bucket->count - 1];
+            }
+            bucket = insert_bucket_at(t, i);
+            bucket->first_hash = hash;
+            bucket->last_hash = hash;
+            bucket->entries[0].hash = hash;
+            return &bucket->entries[0];
+        }
+        if (hash <= bucket->last_hash) {
+            if (bucket->count >= bucket_size) {
+                bucket = insert_bucket_at(t, i);
+                bucket->first_hash = hash;
+                bucket->last_hash = hash;
+                bucket->entries[0].hash = hash;
+                return &bucket->entries[0];
+            }
+        }
+    }
+}
+
+void hashmap_set(struct hashmap *t, char const *key, void *value)
+{
+    uint64_t hash = hash_key(key);
+
+    if (t->buckets_count == 0) {
+        struct hash_bucket *bucket = insert_bucket_at(t, 0);
+        bucket->entries[0] = (struct hash_entry) { hash, value };
+        return;
+    }
+
+    struct hash_entry *entry = find_entry(t, hash);
+    if (!entry) {
+        entry = make_entry(t, hash);
+    }
+    entry->value = value;
+    return;
+}
+
+void *hashmap_get(struct hashmap *t, char const *key)
+{
+    uint64_t hash = hash_key(key);
+}
diff --git a/src/collections.h b/src/collections.h
new file mode 100644
index 0000000..089e325
--- /dev/null
+++ b/src/collections.h
@@ -0,0 +1,43 @@
+#ifndef COLLECTIONS_H
+#define COLLECTIONS_H
+
+#include <stddef.h>
+#include <stdint.h>
+
+void *array_push(void **data,
+    size_t *capacity,
+    size_t *count,
+    void const *elem,
+    size_t elem_size);
+
+void *array_insert_at(void **data,
+    size_t *capacity,
+    size_t *count,
+    size_t idx,
+    void const *elem,
+    size_t elem_size);
+
+struct hash_entry {
+    uint64_t hash;
+    void *value;
+};
+
+struct hash_bucket {
+    struct hash_entry *entries;
+    size_t count;
+    uint64_t first_hash;
+    uint64_t last_hash;
+};
+
+struct hashmap {
+    struct hash_bucket *buckets;
+    size_t buckets_capacity;
+    size_t buckets_count;
+};
+
+void hashmap_construct(struct hashmap *t);
+void hashmap_destroy(struct hashmap *t);
+void hashmap_set(struct hashmap *t, char const *key, void *value);
+void *hashmap_get(struct hashmap *t, char const *key);
+
+#endif
diff --git a/src/main.c b/src/main.c
new file mode 100644
index 0000000..99bf592
--- /dev/null
+++ b/src/main.c
@@ -0,0 +1,240 @@
+#include <assert.h>
+#include <errno.h>
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+struct loc {
+    size_t idx;
+    int line;
+    int col;
+};
+
+void report(
+    struct loc loc, char const *message, char const *text, size_t text_len)
+{
+    fprintf(stderr, "error: %s\n", message);
+    if (!text)
+        return;
+    assert(text[loc.idx] != '\n');
+    size_t line_begin_idx = loc.idx;
+    while (line_begin_idx > 0 && text[line_begin_idx] != '\n') {
+        line_begin_idx -= 1;
+    }
+    if (text[line_begin_idx] == '\n') {
+        line_begin_idx += 1;
+    }
+    size_t line_end_idx = loc.idx + 1;
+    while (line_end_idx < text_len && text[line_end_idx] != '\n') {
+        line_end_idx += 1;
+    }
+    if (line_end_idx >= text_len || text[line_end_idx] == '\n') {
+        line_end_idx -= 1;
+    }
+    int linenr_width = snprintf(NULL, 0, "%d", loc.line);
+    static char const *spaces = "                        ";
+    printf("%.*s|\n"
+           "%d|%.*s\n"
+           "%.*s|%.*s^\n"
+           "%.*s|\n",
+        linenr_width,
+        spaces,
+        loc.line,
+        (int)(line_end_idx - line_begin_idx + 1),
+        &text[line_begin_idx],
+        linenr_width,
+        spaces,
+        loc.col - 1,
+        spaces,
+        linenr_width,
+        spaces);
+}
+
+enum tokty {
+    tt_eof,
+    tt_null,
+    tt_false,
+    tt_true,
+    tt_string,
+    tt_float,
+    tt_int = '0',
+    tt_comma = ',',
+    tt_colon = ':',
+    tt_lbracket = '[',
+    tt_rbracket = ']',
+    tt_lbrace = '{',
+    tt_rbrace = '}',
+};
+
+struct tok {
+    enum tokty ty;
+    char const *ptr;
+    size_t len;
+    struct loc loc;
+};
+
+struct tokenizer {
+    char const *text;
+    size_t len;
+    size_t idx;
+    int line;
+    int col;
+    bool failed;
+};
+
+static void t_step(struct tokenizer *t)
+{
+    if (t->idx >= t->len)
+        return;
+    if (t->text[t->idx] == '\n') {
+        t->line += 1;
+        t->col = 1;
+    } else {
+        t->col += 1;
+    }
+    t->idx += 1;
+}
+
+static struct tok t_tok(struct tokenizer *t, enum tokty ty, struct loc loc)
+{
+    return (struct tok) { ty, &t->text[loc.idx], t->idx - loc.idx, loc };
+}
+
+struct tok tokenizer_next(struct tokenizer *t)
+{
+    struct loc loc = { t->idx, t->line, t->col };
+    size_t *i = &t->idx;
+    if (*i >= t->len) {
+        return t_tok(t, tt_eof, loc);
+    }
+    bool matched = false;
+    while (*i < t->len && strchr(" \t\r\n", t->text[*i]) != NULL) {
+        matched = true;
+        *i += 1;
+    }
+    if (matched) {
+        return tokenizer_next(t);
+    }
+    if (strchr(",:[]{}0", t->text[*i]) != NULL) {
+        enum tokty ty = (enum tokty)t->text[*i];
+        *i += 1;
+        return t_tok(t, ty, loc);
+    }
+    while (*i < t->len && t->text[*i] >= 'a' && t->text[*i] <= 'z') {
+        matched = true;
+        *i += 1;
+    }
+    if (matched) {
+        char const *kws[] = { "null", "false", "true" };
+        enum tokty tys[] = { tt_null, tt_false, tt_true };
+        for (size_t kw_i = 0; kw_i < sizeof(kws) / sizeof(kws[0]); ++kw_i) {
+            if (strncmp(kws[kw_i], &t->text[loc.idx], *i - loc.idx)) {
+                return t_tok(t, tys[kw_i], loc);
+            }
+        }
+        report(loc, "invalid identifier", t->text, t->len);
+        t->failed = true;
+        return tokenizer_next(t);
+    }
+    if (t->text[*i] >= '1' && t->text[*i] <= '9') {
+        while (*i < t->len && t->text[*i] >= '0' && t->text[*i] <= '9') {
+            *i += 1;
+        }
+        enum tokty ty = tt_int;
+        if (*i < t->len && t->text[*i] == '.') {
+            ty = tt_float;
+            *i += 1;
+            while (*i < t->len && t->text[*i] >= '0' && t->text[*i] <= '9') {
+                *i += 1;
+            }
+        }
+        return t_tok(t, ty, loc);
+    }
+    if (t->text[*i] == '\"') {
+        i += 1;
+        while (*i < t->len && t->text[*i] != '\"') {
+            if (t->text[*i] != '\\') {
+                *i += 1;
+                if (*i >= t->len)
+                    break;
+            }
+            *i += 1;
+        }
+        if (*i >= t->len && t->text[*i] != '\"') {
+            report(loc, "malformed string", t->text, t->len);
+            t->failed = true;
+            return tokenizer_next(t);
+        }
+        *i += 1;
+        return t_tok(t, tt_string, loc);
+    }
+    report(loc, "illegal character", t->text, t->len);
+    t->failed = true;
+    *i += 1;
+    return tokenizer_next(t);
+}
+
+enum valty {
+    vt_null,
+    vt_number,
+    vt_string,
+    vt_array,
+    vt_object,
+};
+
+struct val {
+    enum valty ty;
+    union {
+        int64_t int_val;
+        double float_val;
+        char *string_val;
+        struct {
+            struct val *array_data;
+            size_t *array_count;
+        };
+    };
+};
+
+int main(int argc, char *argv[])
+{
+    if (argc < 3) {
+        fprintf(stderr, "error: incorrect arguments\n");
+        return EXIT_FAILURE;
+    }
+    char const *pattern = argv[1];
+    char const *filename = argv[2];
+
+    FILE *file = fopen(filename, "r");
+    if (!file) {
+        fprintf(stderr,
+            "error: could not open file (%s) \"%s\"\n",
+            strerror(errno),
+            filename);
+        return EXIT_FAILURE;
+    }
+
+    if (fseek(file, 0, SEEK_END) != 0) {
+        fprintf(stderr, "error: could not seek (%s)\n", strerror(errno));
+        return EXIT_FAILURE;
+    }
+    long ftell_result = ftell(file);
+    if (ftell_result < 0) {
+        fprintf(stderr, "error: could not tell (%s)\n", strerror(errno));
+        return EXIT_FAILURE;
+    }
+    size_t file_size = (size_t)ftell_result;
+    char *text = malloc(file_size + 1);
+
+    size_t bytes_read = fread(text, 1, file_size, file);
+    if (bytes_read != file_size) {
+        fprintf(stderr, "error: could not read (%s)\n", strerror(errno));
+        return EXIT_FAILURE;
+    }
+
+    struct tokenizer tokenizer = { text, file_size, 0, 1, 1 };
+
+    printf("hello\n");
+}