From e36f1e1b8166e9045306b60fd1262aeebed60357 Mon Sep 17 00:00:00 2001
From: sfja <sfja2004@gmail.com>
Date: Thu, 19 Mar 2026 19:46:52 +0100
Subject: [PATCH] debug hashmap

---
 .gitignore          |   1 +
 .vscode/launch.json |  27 ++++++++++
 .vscode/tasks.json  |  12 +++++
 Makefile            |  13 +++--
 src/collections.c   | 122 +++++++++++++++++++++++++++++---------------
 src/collections.h   |  14 +++--
 src/main.c          |  79 +++++++++++++++++++++++++---
 7 files changed, 210 insertions(+), 58 deletions(-)
 create mode 100644 .vscode/launch.json
 create mode 100644 .vscode/tasks.json

diff --git a/.gitignore b/.gitignore
index 567609b..71e0d97 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1 +1,2 @@
 build/
+.vscode/settings.json
diff --git a/.vscode/launch.json b/.vscode/launch.json
new file mode 100644
index 0000000..896b5e2
--- /dev/null
+++ b/.vscode/launch.json
@@ -0,0 +1,27 @@
+{
+    // Use IntelliSense to learn about possible attributes.
+    // Hover to view descriptions of existing attributes.
+    // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
+    "version": "0.2.0",
+    "configurations": [
+        {
+            "name": "Debug",
+            "type": "cppdbg",
+            "request": "launch",
+            "program": "${workspaceFolder}/build/jq",
+            "args": ["", "data.json"],
+            "stopAtEntry": true,
+            "cwd": "${workspaceFolder}",
+            "environment": [],
+            "MIMode": "gdb",
+            "preLaunchTask": "build",
+            "setupCommands": [
+                {
+                    "description": "Enable pretty-printing for gdb",
+                    "text": "-enable-pretty-printing",
+                    "ignoreFailures": true
+                },
+            ]
+        }
+    ]
+}
\ No newline at end of file
diff --git a/.vscode/tasks.json b/.vscode/tasks.json
new file mode 100644
index 0000000..766d27e
--- /dev/null
+++ b/.vscode/tasks.json
@@ -0,0 +1,12 @@
+{
+    // See https://go.microsoft.com/fwlink/?LinkId=733558
+    // for the documentation about the tasks.json format
+    "version": "2.0.0",
+    "tasks": [
+        {
+            "label": "build",
+            "type": "shell",
+            "command": "make all"
+        }
+    ]
+}
\ No newline at end of file
diff --git a/Makefile b/Makefile
index 34b80eb..995b184 100644
--- a/Makefile
+++ b/Makefile
@@ -1,9 +1,11 @@
 
 MAKEFLAGS += -j16
 
-CFLAGS=-std=c17 -pedantic-errors -Wall -Wextra -Wconversion -fsanitize=address
+CFLAGS=-std=c17 -pedantic-errors -Wall -Wextra -Wconversion -g
 LDFLAGS=
 
+# CFLAGS+=-fsanitize=address
+
 build_dir = build
 obj_dir = $(build_dir)/obj
 
@@ -11,11 +13,14 @@ sources = \
 	src/main.c \
 	src/collections.c
 
-target=jq
+target=$(build_dir)/jq
 
-all: $(build_dir)/$(target)
+all: $(target)
 
-$(build_dir)/$(target): $(sources:%.c=$(obj_dir)/%.o)
+debug: $(target)
+	gdb tui -ex 'b main.c:254' -ex 'b collections.c:161' -ex 'r' --args build/jq '.' data.json
+
+$(target): $(sources:%.c=$(obj_dir)/%.o)
 	gcc -o $@ $(CFLAGS) $(LDFLAGS) $^
 
 $(obj_dir)/%.o: %.c
diff --git a/src/collections.c b/src/collections.c
index 0c05912..31dc1b5 100644
--- a/src/collections.c
+++ b/src/collections.c
@@ -1,4 +1,6 @@
 #include "collections.h"
+#include <assert.h>
+#include <stdbool.h>
 #include <stddef.h>
 #include <stdint.h>
 #include <stdlib.h>
@@ -14,7 +16,7 @@ void *array_push(void **data,
         *capacity = 8;
         *data = malloc(*capacity * elem_size);
         *count = 0;
-    } else if (*count + 1 >= *capacity) {
+    } else if (*count + 1 > *capacity) {
         *capacity *= 2;
         *data = realloc(*data, *capacity * elem_size);
     }
@@ -22,6 +24,7 @@ void *array_push(void **data,
     if (elem) {
         memcpy(ptr, elem, elem_size);
     }
+    *count += 1;
     return ptr;
 }
 
@@ -35,6 +38,9 @@ void *array_insert_at(void **data,
     if (idx >= *count) {
         return array_push(data, capacity, count, elem, elem_size);
     }
+    if (*count + 1 > *capacity) {
+        array_push(data, capacity, count, NULL, elem_size);
+    }
     void *src_ptr = &((unsigned char *)*data)[idx * elem_size];
     void *dest_ptr = &((unsigned char *)*data)[(idx + 1) * elem_size];
     memmove(dest_ptr, src_ptr, (*count - idx) * elem_size);
@@ -64,12 +70,17 @@ void hashmap_construct(struct hashmap *t)
     *t = (struct hashmap) { NULL, 0, 0 };
 }
 
-static size_t const bucket_size = 16;
-
-static struct hash_entry *find_entry(struct hashmap *t, uint64_t hash)
+void hashmap_destroy(struct hashmap *t)
 {
-    for (size_t b_idx = 0; b_idx < t->buckets_count; ++b_idx) {
-        struct hash_bucket *bucket = &t->buckets[b_idx];
+    if (t->buckets) {
+        free(t->buckets);
+    }
+}
+
+static struct hash_entry *find_entry(struct hashmap *m, uint64_t hash)
+{
+    for (size_t b_idx = 0; b_idx < m->buckets_count; ++b_idx) {
+        struct hash_bucket *bucket = &m->buckets[b_idx];
         if (hash < bucket->first_hash || hash > bucket->last_hash) {
             continue;
         }
@@ -83,70 +94,97 @@ static struct hash_entry *find_entry(struct hashmap *t, uint64_t hash)
     return NULL;
 }
 
-static struct hash_bucket *insert_bucket_at(struct hashmap *t, size_t idx)
+static struct hash_bucket *insert_bucket_at(struct hashmap *m, size_t idx)
 {
     struct hash_bucket bucket = {
-        .entries = malloc(sizeof(struct hash_entry) * bucket_size),
+        .entries = { { 0 } },
         .count = 0,
         .first_hash = 0,
         .last_hash = 0,
     };
 
-    return array_insert_at((void **)&t->buckets,
-        &t->buckets_capacity,
-        &t->buckets_count,
+    return array_insert_at((void **)&m->buckets,
+        &m->buckets_capacity,
+        &m->buckets_count,
         idx,
         &bucket,
         sizeof(struct hash_bucket));
 }
 
-static struct hash_entry *make_entry(struct hashmap *t, uint64_t hash)
+#define MIN(A, B) ((A) <= (B) ? (A) : (B))
+#define MAX(A, B) ((A) >= (B) ? (A) : (B))
+
+static struct hash_entry *add_entry_to_bucket(
+    struct hash_bucket *b, uint64_t hash)
 {
-    for (size_t i = 0; i < t->buckets_count; ++i) {
-        struct hash_bucket *bucket = &t->buckets[i];
-        if (hash < bucket->first_hash) {
-            if (bucket->count < bucket_size) {
-                bucket->count += 1;
-                bucket->first_hash = hash;
-                return &bucket->entries[bucket->count - 1];
-            }
-            bucket = insert_bucket_at(t, i);
-            bucket->first_hash = hash;
-            bucket->last_hash = hash;
-            bucket->entries[0].hash = hash;
-            return &bucket->entries[0];
-        }
-        if (hash <= bucket->last_hash) {
-            if (bucket->count >= bucket_size) {
-                bucket = insert_bucket_at(t, i);
-                bucket->first_hash = hash;
-                bucket->last_hash = hash;
-                bucket->entries[0].hash = hash;
-                return &bucket->entries[0];
-            }
-        }
-    }
+    b->count += 1;
+    b->first_hash = MIN(b->first_hash, hash);
+    b->last_hash = MAX(b->last_hash, hash);
+    return &b->entries[b->count - 1];
 }
 
-void hashmap_set(struct hashmap *t, char const *key, void *value)
+static struct hash_entry *make_entry(struct hashmap *m, uint64_t hash)
+{
+    assert(m->buckets_count >= 1 && m->buckets[0].count != 0);
+
+    if (hash < m->buckets[0].first_hash
+        && m->buckets[0].count < bucket_capacity) {
+
+        return add_entry_to_bucket(&m->buckets[0], hash);
+    }
+    for (size_t i = 0; i < m->buckets_count; ++i) {
+        struct hash_bucket *curr = &m->buckets[i];
+
+        struct hash_bucket *next
+            = i + 1 < m->buckets_count ? &m->buckets[i + 1] : NULL;
+
+        if (next && hash >= next->first_hash)
+            continue;
+
+        if (curr->count < bucket_capacity) {
+            return add_entry_to_bucket(curr, hash);
+        } else if (next && next->count < bucket_capacity) {
+            return add_entry_to_bucket(next, hash);
+        } else {
+            struct hash_bucket *b = insert_bucket_at(m, i + 1);
+            b->first_hash = UINT64_MAX;
+            return add_entry_to_bucket(b, hash);
+        }
+    }
+    assert(false);
+}
+
+void hashmap_set(struct hashmap *m, char const *key, void *value)
 {
     uint64_t hash = hash_key(key);
 
-    if (t->buckets_count == 0) {
-        struct hash_bucket *bucket = insert_bucket_at(t, 0);
+    if (m->buckets_count == 0) {
+        struct hash_bucket *bucket = insert_bucket_at(m, 0);
         bucket->entries[0] = (struct hash_entry) { hash, value };
+        bucket->count += 1;
+        bucket->first_hash = hash;
+        bucket->last_hash = hash;
         return;
     }
 
-    struct hash_entry *entry = find_entry(t, hash);
+    struct hash_entry *entry = find_entry(m, hash);
     if (!entry) {
-        entry = make_entry(t, hash);
+        entry = make_entry(m, hash);
+        entry->hash = hash;
     }
     entry->value = value;
     return;
 }
 
-void *hashmap_get(struct hashmap *t, char const *key)
+bool hashmap_has(struct hashmap *m, char const *key)
 {
     uint64_t hash = hash_key(key);
+    return find_entry(m, hash) != NULL;
+}
+
+void *hashmap_get(struct hashmap *m, char const *key)
+{
+    uint64_t hash = hash_key(key);
+    struct hash_entry *entry = find_entry(m, hash);
+    return entry ? entry->value : NULL;
 }
diff --git a/src/collections.h b/src/collections.h
index 089e325..77d9690 100644
--- a/src/collections.h
+++ b/src/collections.h
@@ -1,6 +1,7 @@
 #ifndef COLLECTIONS_H
 #define COLLECTIONS_H
 
+#include <stdbool.h>
 #include <stddef.h>
 #include <stdint.h>
 
@@ -22,8 +23,10 @@ struct hash_entry {
     void *value;
 };
 
+#define bucket_capacity 16
+
 struct hash_bucket {
-    struct hash_entry *entries;
+    struct hash_entry entries[bucket_capacity];
     size_t count;
     uint64_t first_hash;
     uint64_t last_hash;
@@ -35,9 +38,10 @@ struct hashmap {
     size_t buckets_count;
 };
 
-void hashmap_construct(struct hashmap *t);
-void hashmap_destroy(struct hashmap *t);
-void hashmap_set(struct hashmap *t, char const *key, void *value);
-void *hashmap_get(struct hashmap *t, char const *key);
+void hashmap_construct(struct hashmap *m);
+void hashmap_destroy(struct hashmap *m);
+void hashmap_set(struct hashmap *m, char const *key, void *value);
+bool hashmap_has(struct hashmap *m, char const *key);
+void *hashmap_get(struct hashmap *m, char const *key);
 
 #endif
diff --git a/src/main.c b/src/main.c
index 99bf592..4b36649 100644
--- a/src/main.c
+++ b/src/main.c
@@ -1,3 +1,4 @@
+#include "collections.h"
 #include <assert.h>
 #include <errno.h>
 #include <stdbool.h>
@@ -216,25 +217,89 @@ int main(int argc, char *argv[])
         return EXIT_FAILURE;
     }
 
-    if (fseek(file, 0, SEEK_END) != 0) {
-        fprintf(stderr, "error: could not seek (%s)\n", strerror(errno));
-        return EXIT_FAILURE;
-    }
+    fseek(file, 0, SEEK_END);
     long ftell_result = ftell(file);
     if (ftell_result < 0) {
         fprintf(stderr, "error: could not tell (%s)\n", strerror(errno));
         return EXIT_FAILURE;
     }
+    fseek(file, 0, SEEK_SET);
     size_t file_size = (size_t)ftell_result;
     char *text = malloc(file_size + 1);
 
     size_t bytes_read = fread(text, 1, file_size, file);
     if (bytes_read != file_size) {
-        fprintf(stderr, "error: could not read (%s)\n", strerror(errno));
+        fprintf(stderr,
+            "error: could not read %ld/%ld (%s)\n",
+            bytes_read,
+            file_size,
+            strerror(errno));
         return EXIT_FAILURE;
     }
+    fclose(file);
 
-    struct tokenizer tokenizer = { text, file_size, 0, 1, 1 };
+    struct tokenizer tokenizer = { text, file_size, 0, 1, 1, false };
 
-    printf("hello\n");
+    free(text);
+
+    char const *keys[] = {
+        "first",
+        "second",
+        "third",
+        "fourth",
+        "fifth",
+        "sixth",
+        "seventh",
+        "eigth",
+        "ninth",
+        "tenth",
+        "first",
+        "seventh",
+        "tenth",
+    };
+    char const *values[] = {
+        "salery",
+        "policy",
+        "strike",
+        "prophecy",
+        "break",
+        "down",
+        "think",
+        "about",
+        "honey",
+        "and",
+        "the",
+        "sweet",
+        "sounds",
+    };
+
+    struct hashmap map;
+    hashmap_construct(&map);
+
+    for (size_t i = 0; i < sizeof(keys) / sizeof(keys[0]); ++i) {
+        hashmap_set(&map, keys[i], (void *)values[i]);
+    }
+
+    for (size_t i = 0; i < sizeof(keys) / sizeof(keys[0]); ++i) {
+        assert(hashmap_has(&map, keys[i]));
+    }
+
+    for (size_t i = 0; i < sizeof(keys) / sizeof(keys[0]); ++i) {
+        switch (i) {
+            case 0:
+                assert(hashmap_get(&map, keys[i]) == values[10]);
+                break;
+            case 6:
+                assert(hashmap_get(&map, keys[i]) == values[11]);
+                break;
+            case 9:
+                assert(hashmap_get(&map, keys[i]) == values[12]);
+                break;
+            default:
+                assert(hashmap_get(&map, keys[i]) == values[i]);
+                break;
+        }
+    }
+
+    hashmap_destroy(&map);
 }