From e36f1e1b8166e9045306b60fd1262aeebed60357 Mon Sep 17 00:00:00 2001 From: sfja Date: Thu, 19 Mar 2026 19:46:52 +0100 Subject: [PATCH] debug hashmap --- .gitignore | 1 + .vscode/launch.json | 27 ++++++++++ .vscode/tasks.json | 12 +++++ Makefile | 13 +++-- src/collections.c | 122 +++++++++++++++++++++++++++++--------------- src/collections.h | 14 +++-- src/main.c | 79 +++++++++++++++++++++++++--- 7 files changed, 210 insertions(+), 58 deletions(-) create mode 100644 .vscode/launch.json create mode 100644 .vscode/tasks.json diff --git a/.gitignore b/.gitignore index 567609b..71e0d97 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ build/ +.vscode/settings.json diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..896b5e2 --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,27 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + "name": "Debug", + "type": "cppdbg", + "request": "launch", + "program": "${workspaceFolder}/build/jq", + "args": ["", "data.json"], + "stopAtEntry": true, + "cwd": "${workspaceFolder}", + "environment": [], + "MIMode": "gdb", + "preLaunchTask": "build", + "setupCommands": [ + { + "description": "Enable pretty-printing for gdb", + "text": "-enable-pretty-printing", + "ignoreFailures": true + }, + ] + } + ] +} \ No newline at end of file diff --git a/.vscode/tasks.json b/.vscode/tasks.json new file mode 100644 index 0000000..766d27e --- /dev/null +++ b/.vscode/tasks.json @@ -0,0 +1,12 @@ +{ + // See https://go.microsoft.com/fwlink/?LinkId=733558 + // for the documentation about the tasks.json format + "version": "2.0.0", + "tasks": [ + { + "label": "build", + "type": "shell", + "command": "make all" + } + ] +} \ No newline at end of file diff --git a/Makefile b/Makefile index 34b80eb..995b184 100644 --- a/Makefile +++ b/Makefile @@ -1,9 +1,11 @@ MAKEFLAGS += -j16 -CFLAGS=-std=c17 -pedantic-errors -Wall -Wextra -Wconversion -fsanitize=address +CFLAGS=-std=c17 -pedantic-errors -Wall -Wextra -Wconversion -g LDFLAGS= +# CFLAGS+=-fsanitize=address + build_dir = build obj_dir = $(build_dir)/obj @@ -11,11 +13,14 @@ sources = \ src/main.c \ src/collections.c -target=jq +target=$(build_dir)/jq -all: $(build_dir)/$(target) +all: $(target) -$(build_dir)/$(target): $(sources:%.c=$(obj_dir)/%.o) +debug: $(target) + gdb tui -ex 'b main.c:254' -ex 'b collections.c:161' -ex 'r' --args build/jq '.' data.json + +$(target): $(sources:%.c=$(obj_dir)/%.o) gcc -o $@ $(CFLAGS) $(LDFLAGS) $^ $(obj_dir)/%.o: %.c diff --git a/src/collections.c b/src/collections.c index 0c05912..31dc1b5 100644 --- a/src/collections.c +++ b/src/collections.c @@ -1,4 +1,6 @@ #include "collections.h" +#include +#include #include #include #include @@ -14,7 +16,7 @@ void *array_push(void **data, *capacity = 8; *data = malloc(*capacity * elem_size); *count = 0; - } else if (*count + 1 >= *capacity) { + } else if (*count + 1 > *capacity) { *capacity *= 2; *data = realloc(*data, *capacity * elem_size); } @@ -22,6 +24,7 @@ void *array_push(void **data, if (elem) { memcpy(ptr, elem, elem_size); } + *count += 1; return ptr; } @@ -35,6 +38,9 @@ void *array_insert_at(void **data, if (idx >= *count) { return array_push(data, capacity, count, elem, elem_size); } + if (*count + 1 > *capacity) { + array_push(data, capacity, count, NULL, elem_size); + } void *src_ptr = &((unsigned char *)*data)[idx * elem_size]; void *dest_ptr = &((unsigned char *)*data)[(idx + 1) * elem_size]; memmove(dest_ptr, src_ptr, (*count - idx) * elem_size); @@ -64,12 +70,17 @@ void hashmap_construct(struct hashmap *t) *t = (struct hashmap) { NULL, 0, 0 }; } -static size_t const bucket_size = 16; - -static struct hash_entry *find_entry(struct hashmap *t, uint64_t hash) +void hashmap_destroy(struct hashmap *t) { - for (size_t b_idx = 0; b_idx < t->buckets_count; ++b_idx) { - struct hash_bucket *bucket = &t->buckets[b_idx]; + if (t->buckets) { + free(t->buckets); + } +} + +static struct hash_entry *find_entry(struct hashmap *m, uint64_t hash) +{ + for (size_t b_idx = 0; b_idx < m->buckets_count; ++b_idx) { + struct hash_bucket *bucket = &m->buckets[b_idx]; if (hash < bucket->first_hash || hash > bucket->last_hash) { continue; } @@ -83,70 +94,97 @@ static struct hash_entry *find_entry(struct hashmap *t, uint64_t hash) return NULL; } -static struct hash_bucket *insert_bucket_at(struct hashmap *t, size_t idx) +static struct hash_bucket *insert_bucket_at(struct hashmap *m, size_t idx) { struct hash_bucket bucket = { - .entries = malloc(sizeof(struct hash_entry) * bucket_size), + .entries = { { 0 } }, .count = 0, .first_hash = 0, .last_hash = 0, }; - return array_insert_at((void **)&t->buckets, - &t->buckets_capacity, - &t->buckets_count, + return array_insert_at((void **)&m->buckets, + &m->buckets_capacity, + &m->buckets_count, idx, &bucket, sizeof(struct hash_bucket)); } -static struct hash_entry *make_entry(struct hashmap *t, uint64_t hash) +#define MIN(A, B) ((A) <= (B) ? (A) : (B)) +#define MAX(A, B) ((A) >= (B) ? (A) : (B)) + +static struct hash_entry *add_entry_to_bucket( + struct hash_bucket *b, uint64_t hash) { - for (size_t i = 0; i < t->buckets_count; ++i) { - struct hash_bucket *bucket = &t->buckets[i]; - if (hash < bucket->first_hash) { - if (bucket->count < bucket_size) { - bucket->count += 1; - bucket->first_hash = hash; - return &bucket->entries[bucket->count - 1]; - } - bucket = insert_bucket_at(t, i); - bucket->first_hash = hash; - bucket->last_hash = hash; - bucket->entries[0].hash = hash; - return &bucket->entries[0]; - } - if (hash <= bucket->last_hash) { - if (bucket->count >= bucket_size) { - bucket = insert_bucket_at(t, i); - bucket->first_hash = hash; - bucket->last_hash = hash; - bucket->entries[0].hash = hash; - return &bucket->entries[0]; - } - } - } + b->count += 1; + b->first_hash = MIN(b->first_hash, hash); + b->last_hash = MAX(b->last_hash, hash); + return &b->entries[b->count - 1]; } -void hashmap_set(struct hashmap *t, char const *key, void *value) +static struct hash_entry *make_entry(struct hashmap *m, uint64_t hash) +{ + assert(m->buckets_count >= 1 && m->buckets[0].count != 0); + + if (hash < m->buckets[0].first_hash + && m->buckets[0].count < bucket_capacity) { + + return add_entry_to_bucket(&m->buckets[0], hash); + } + for (size_t i = 0; i < m->buckets_count; ++i) { + struct hash_bucket *curr = &m->buckets[i]; + + struct hash_bucket *next + = i + 1 < m->buckets_count ? &m->buckets[i + 1] : NULL; + + if (next && hash >= next->first_hash) + continue; + + if (curr->count < bucket_capacity) { + return add_entry_to_bucket(curr, hash); + } else if (next && next->count < bucket_capacity) { + return add_entry_to_bucket(next, hash); + } else { + struct hash_bucket *b = insert_bucket_at(m, i + 1); + b->first_hash = UINT64_MAX; + return add_entry_to_bucket(b, hash); + } + } + assert(false); +} + +void hashmap_set(struct hashmap *m, char const *key, void *value) { uint64_t hash = hash_key(key); - if (t->buckets_count == 0) { - struct hash_bucket *bucket = insert_bucket_at(t, 0); + if (m->buckets_count == 0) { + struct hash_bucket *bucket = insert_bucket_at(m, 0); bucket->entries[0] = (struct hash_entry) { hash, value }; + bucket->count += 1; + bucket->first_hash = hash; + bucket->last_hash = hash; return; } - struct hash_entry *entry = find_entry(t, hash); + struct hash_entry *entry = find_entry(m, hash); if (!entry) { - entry = make_entry(t, hash); + entry = make_entry(m, hash); + entry->hash = hash; } entry->value = value; return; } -void *hashmap_get(struct hashmap *t, char const *key) +bool hashmap_has(struct hashmap *m, char const *key) { uint64_t hash = hash_key(key); + return find_entry(m, hash) != NULL; +} + +void *hashmap_get(struct hashmap *m, char const *key) +{ + uint64_t hash = hash_key(key); + struct hash_entry *entry = find_entry(m, hash); + return entry ? entry->value : NULL; } diff --git a/src/collections.h b/src/collections.h index 089e325..77d9690 100644 --- a/src/collections.h +++ b/src/collections.h @@ -1,6 +1,7 @@ #ifndef COLLECTIONS_H #define COLLECTIONS_H +#include #include #include @@ -22,8 +23,10 @@ struct hash_entry { void *value; }; +#define bucket_capacity 16 + struct hash_bucket { - struct hash_entry *entries; + struct hash_entry entries[bucket_capacity]; size_t count; uint64_t first_hash; uint64_t last_hash; @@ -35,9 +38,10 @@ struct hashmap { size_t buckets_count; }; -void hashmap_construct(struct hashmap *t); -void hashmap_destroy(struct hashmap *t); -void hashmap_set(struct hashmap *t, char const *key, void *value); -void *hashmap_get(struct hashmap *t, char const *key); +void hashmap_construct(struct hashmap *m); +void hashmap_destroy(struct hashmap *m); +void hashmap_set(struct hashmap *m, char const *key, void *value); +bool hashmap_has(struct hashmap *m, char const *key); +void *hashmap_get(struct hashmap *m, char const *key); #endif diff --git a/src/main.c b/src/main.c index 99bf592..4b36649 100644 --- a/src/main.c +++ b/src/main.c @@ -1,3 +1,4 @@ +#include "collections.h" #include #include #include @@ -216,25 +217,89 @@ int main(int argc, char *argv[]) return EXIT_FAILURE; } - if (fseek(file, 0, SEEK_END) != 0) { - fprintf(stderr, "error: could not seek (%s)\n", strerror(errno)); - return EXIT_FAILURE; - } + fseek(file, 0, SEEK_END); long ftell_result = ftell(file); if (ftell_result < 0) { fprintf(stderr, "error: could not tell (%s)\n", strerror(errno)); return EXIT_FAILURE; } + fseek(file, 0, SEEK_SET); size_t file_size = (size_t)ftell_result; char *text = malloc(file_size + 1); size_t bytes_read = fread(text, 1, file_size, file); if (bytes_read != file_size) { - fprintf(stderr, "error: could not read (%s)\n", strerror(errno)); + fprintf(stderr, + "error: could not read %ld/%ld (%s)\n", + bytes_read, + file_size, + strerror(errno)); return EXIT_FAILURE; } + fclose(file); - struct tokenizer tokenizer = { text, file_size, 0, 1, 1 }; + struct tokenizer tokenizer = { text, file_size, 0, 1, 1, false }; - printf("hello\n"); + free(text); + + char const *keys[] = { + "first", + "second", + "third", + "fourth", + "fifth", + "sixth", + "seventh", + "eigth", + "ninth", + "tenth", + "first", + "seventh", + "tenth", + }; + char const *values[] = { + "salery", + "policy", + "strike", + "prophecy", + "break", + "down", + "think", + "about", + "honey", + "and", + "the", + "sweet", + "sounds", + }; + + struct hashmap map; + hashmap_construct(&map); + + for (size_t i = 0; i < sizeof(keys) / sizeof(keys[0]); ++i) { + hashmap_set(&map, keys[i], (void *)values[i]); + } + + for (size_t i = 0; i < sizeof(keys) / sizeof(keys[0]); ++i) { + assert(hashmap_has(&map, keys[i])); + } + + for (size_t i = 0; i < sizeof(keys) / sizeof(keys[0]); ++i) { + switch (i) { + case 0: + assert(hashmap_get(&map, keys[i]) == values[10]); + break; + case 6: + assert(hashmap_get(&map, keys[i]) == values[11]); + break; + case 9: + assert(hashmap_get(&map, keys[i]) == values[12]); + break; + default: + assert(hashmap_get(&map, keys[i]) == values[i]); + break; + } + } + + hashmap_destroy(&map); }