debug hashmap

This commit is contained in:
sfja 2026-03-19 19:46:52 +01:00
parent 24be8185c6
commit e36f1e1b81
7 changed files with 210 additions and 58 deletions

1
.gitignore vendored
View File

@ -1 +1,2 @@
build/ build/
.vscode/settings.json

27
.vscode/launch.json vendored Normal file
View File

@ -0,0 +1,27 @@
{
// Use IntelliSense to learn about possible attributes.
// Hover to view descriptions of existing attributes.
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
"version": "0.2.0",
"configurations": [
{
"name": "Debug",
"type": "cppdbg",
"request": "launch",
"program": "${workspaceFolder}/build/jq",
"args": ["", "data.json"],
"stopAtEntry": true,
"cwd": "${workspaceFolder}",
"environment": [],
"MIMode": "gdb",
"preLaunchTask": "build",
"setupCommands": [
{
"description": "Enable pretty-printing for gdb",
"text": "-enable-pretty-printing",
"ignoreFailures": true
},
]
}
]
}

12
.vscode/tasks.json vendored Normal file
View File

@ -0,0 +1,12 @@
{
// See https://go.microsoft.com/fwlink/?LinkId=733558
// for the documentation about the tasks.json format
"version": "2.0.0",
"tasks": [
{
"label": "build",
"type": "shell",
"command": "make all"
}
]
}

View File

@ -1,9 +1,11 @@
MAKEFLAGS += -j16 MAKEFLAGS += -j16
CFLAGS=-std=c17 -pedantic-errors -Wall -Wextra -Wconversion -fsanitize=address CFLAGS=-std=c17 -pedantic-errors -Wall -Wextra -Wconversion -g
LDFLAGS= LDFLAGS=
# CFLAGS+=-fsanitize=address
build_dir = build build_dir = build
obj_dir = $(build_dir)/obj obj_dir = $(build_dir)/obj
@ -11,11 +13,14 @@ sources = \
src/main.c \ src/main.c \
src/collections.c src/collections.c
target=jq target=$(build_dir)/jq
all: $(build_dir)/$(target) all: $(target)
$(build_dir)/$(target): $(sources:%.c=$(obj_dir)/%.o) debug: $(target)
gdb tui -ex 'b main.c:254' -ex 'b collections.c:161' -ex 'r' --args build/jq '.' data.json
$(target): $(sources:%.c=$(obj_dir)/%.o)
gcc -o $@ $(CFLAGS) $(LDFLAGS) $^ gcc -o $@ $(CFLAGS) $(LDFLAGS) $^
$(obj_dir)/%.o: %.c $(obj_dir)/%.o: %.c

View File

@ -1,4 +1,6 @@
#include "collections.h" #include "collections.h"
#include <assert.h>
#include <stdbool.h>
#include <stddef.h> #include <stddef.h>
#include <stdint.h> #include <stdint.h>
#include <stdlib.h> #include <stdlib.h>
@ -14,7 +16,7 @@ void *array_push(void **data,
*capacity = 8; *capacity = 8;
*data = malloc(*capacity * elem_size); *data = malloc(*capacity * elem_size);
*count = 0; *count = 0;
} else if (*count + 1 >= *capacity) { } else if (*count + 1 > *capacity) {
*capacity *= 2; *capacity *= 2;
*data = realloc(*data, *capacity * elem_size); *data = realloc(*data, *capacity * elem_size);
} }
@ -22,6 +24,7 @@ void *array_push(void **data,
if (elem) { if (elem) {
memcpy(ptr, elem, elem_size); memcpy(ptr, elem, elem_size);
} }
*count += 1;
return ptr; return ptr;
} }
@ -35,6 +38,9 @@ void *array_insert_at(void **data,
if (idx >= *count) { if (idx >= *count) {
return array_push(data, capacity, count, elem, elem_size); return array_push(data, capacity, count, elem, elem_size);
} }
if (*count + 1 > *capacity) {
array_push(data, capacity, count, NULL, elem_size);
}
void *src_ptr = &((unsigned char *)*data)[idx * elem_size]; void *src_ptr = &((unsigned char *)*data)[idx * elem_size];
void *dest_ptr = &((unsigned char *)*data)[(idx + 1) * elem_size]; void *dest_ptr = &((unsigned char *)*data)[(idx + 1) * elem_size];
memmove(dest_ptr, src_ptr, (*count - idx) * elem_size); memmove(dest_ptr, src_ptr, (*count - idx) * elem_size);
@ -64,12 +70,17 @@ void hashmap_construct(struct hashmap *t)
*t = (struct hashmap) { NULL, 0, 0 }; *t = (struct hashmap) { NULL, 0, 0 };
} }
static size_t const bucket_size = 16; void hashmap_destroy(struct hashmap *t)
static struct hash_entry *find_entry(struct hashmap *t, uint64_t hash)
{ {
for (size_t b_idx = 0; b_idx < t->buckets_count; ++b_idx) { if (t->buckets) {
struct hash_bucket *bucket = &t->buckets[b_idx]; free(t->buckets);
}
}
static struct hash_entry *find_entry(struct hashmap *m, uint64_t hash)
{
for (size_t b_idx = 0; b_idx < m->buckets_count; ++b_idx) {
struct hash_bucket *bucket = &m->buckets[b_idx];
if (hash < bucket->first_hash || hash > bucket->last_hash) { if (hash < bucket->first_hash || hash > bucket->last_hash) {
continue; continue;
} }
@ -83,70 +94,97 @@ static struct hash_entry *find_entry(struct hashmap *t, uint64_t hash)
return NULL; return NULL;
} }
static struct hash_bucket *insert_bucket_at(struct hashmap *t, size_t idx) static struct hash_bucket *insert_bucket_at(struct hashmap *m, size_t idx)
{ {
struct hash_bucket bucket = { struct hash_bucket bucket = {
.entries = malloc(sizeof(struct hash_entry) * bucket_size), .entries = { { 0 } },
.count = 0, .count = 0,
.first_hash = 0, .first_hash = 0,
.last_hash = 0, .last_hash = 0,
}; };
return array_insert_at((void **)&t->buckets, return array_insert_at((void **)&m->buckets,
&t->buckets_capacity, &m->buckets_capacity,
&t->buckets_count, &m->buckets_count,
idx, idx,
&bucket, &bucket,
sizeof(struct hash_bucket)); sizeof(struct hash_bucket));
} }
static struct hash_entry *make_entry(struct hashmap *t, uint64_t hash) #define MIN(A, B) ((A) <= (B) ? (A) : (B))
#define MAX(A, B) ((A) >= (B) ? (A) : (B))
static struct hash_entry *add_entry_to_bucket(
struct hash_bucket *b, uint64_t hash)
{ {
for (size_t i = 0; i < t->buckets_count; ++i) { b->count += 1;
struct hash_bucket *bucket = &t->buckets[i]; b->first_hash = MIN(b->first_hash, hash);
if (hash < bucket->first_hash) { b->last_hash = MAX(b->last_hash, hash);
if (bucket->count < bucket_size) { return &b->entries[b->count - 1];
bucket->count += 1;
bucket->first_hash = hash;
return &bucket->entries[bucket->count - 1];
}
bucket = insert_bucket_at(t, i);
bucket->first_hash = hash;
bucket->last_hash = hash;
bucket->entries[0].hash = hash;
return &bucket->entries[0];
}
if (hash <= bucket->last_hash) {
if (bucket->count >= bucket_size) {
bucket = insert_bucket_at(t, i);
bucket->first_hash = hash;
bucket->last_hash = hash;
bucket->entries[0].hash = hash;
return &bucket->entries[0];
}
}
}
} }
void hashmap_set(struct hashmap *t, char const *key, void *value) static struct hash_entry *make_entry(struct hashmap *m, uint64_t hash)
{
assert(m->buckets_count >= 1 && m->buckets[0].count != 0);
if (hash < m->buckets[0].first_hash
&& m->buckets[0].count < bucket_capacity) {
return add_entry_to_bucket(&m->buckets[0], hash);
}
for (size_t i = 0; i < m->buckets_count; ++i) {
struct hash_bucket *curr = &m->buckets[i];
struct hash_bucket *next
= i + 1 < m->buckets_count ? &m->buckets[i + 1] : NULL;
if (next && hash >= next->first_hash)
continue;
if (curr->count < bucket_capacity) {
return add_entry_to_bucket(curr, hash);
} else if (next && next->count < bucket_capacity) {
return add_entry_to_bucket(next, hash);
} else {
struct hash_bucket *b = insert_bucket_at(m, i + 1);
b->first_hash = UINT64_MAX;
return add_entry_to_bucket(b, hash);
}
}
assert(false);
}
void hashmap_set(struct hashmap *m, char const *key, void *value)
{ {
uint64_t hash = hash_key(key); uint64_t hash = hash_key(key);
if (t->buckets_count == 0) { if (m->buckets_count == 0) {
struct hash_bucket *bucket = insert_bucket_at(t, 0); struct hash_bucket *bucket = insert_bucket_at(m, 0);
bucket->entries[0] = (struct hash_entry) { hash, value }; bucket->entries[0] = (struct hash_entry) { hash, value };
bucket->count += 1;
bucket->first_hash = hash;
bucket->last_hash = hash;
return; return;
} }
struct hash_entry *entry = find_entry(t, hash); struct hash_entry *entry = find_entry(m, hash);
if (!entry) { if (!entry) {
entry = make_entry(t, hash); entry = make_entry(m, hash);
entry->hash = hash;
} }
entry->value = value; entry->value = value;
return; return;
} }
void *hashmap_get(struct hashmap *t, char const *key) bool hashmap_has(struct hashmap *m, char const *key)
{ {
uint64_t hash = hash_key(key); uint64_t hash = hash_key(key);
return find_entry(m, hash) != NULL;
}
void *hashmap_get(struct hashmap *m, char const *key)
{
uint64_t hash = hash_key(key);
struct hash_entry *entry = find_entry(m, hash);
return entry ? entry->value : NULL;
} }

View File

@ -1,6 +1,7 @@
#ifndef COLLECTIONS_H #ifndef COLLECTIONS_H
#define COLLECTIONS_H #define COLLECTIONS_H
#include <stdbool.h>
#include <stddef.h> #include <stddef.h>
#include <stdint.h> #include <stdint.h>
@ -22,8 +23,10 @@ struct hash_entry {
void *value; void *value;
}; };
#define bucket_capacity 16
struct hash_bucket { struct hash_bucket {
struct hash_entry *entries; struct hash_entry entries[bucket_capacity];
size_t count; size_t count;
uint64_t first_hash; uint64_t first_hash;
uint64_t last_hash; uint64_t last_hash;
@ -35,9 +38,10 @@ struct hashmap {
size_t buckets_count; size_t buckets_count;
}; };
void hashmap_construct(struct hashmap *t); void hashmap_construct(struct hashmap *m);
void hashmap_destroy(struct hashmap *t); void hashmap_destroy(struct hashmap *m);
void hashmap_set(struct hashmap *t, char const *key, void *value); void hashmap_set(struct hashmap *m, char const *key, void *value);
void *hashmap_get(struct hashmap *t, char const *key); bool hashmap_has(struct hashmap *m, char const *key);
void *hashmap_get(struct hashmap *m, char const *key);
#endif #endif

View File

@ -1,3 +1,4 @@
#include "collections.h"
#include <assert.h> #include <assert.h>
#include <errno.h> #include <errno.h>
#include <stdbool.h> #include <stdbool.h>
@ -216,25 +217,89 @@ int main(int argc, char *argv[])
return EXIT_FAILURE; return EXIT_FAILURE;
} }
if (fseek(file, 0, SEEK_END) != 0) { fseek(file, 0, SEEK_END);
fprintf(stderr, "error: could not seek (%s)\n", strerror(errno));
return EXIT_FAILURE;
}
long ftell_result = ftell(file); long ftell_result = ftell(file);
if (ftell_result < 0) { if (ftell_result < 0) {
fprintf(stderr, "error: could not tell (%s)\n", strerror(errno)); fprintf(stderr, "error: could not tell (%s)\n", strerror(errno));
return EXIT_FAILURE; return EXIT_FAILURE;
} }
fseek(file, 0, SEEK_SET);
size_t file_size = (size_t)ftell_result; size_t file_size = (size_t)ftell_result;
char *text = malloc(file_size + 1); char *text = malloc(file_size + 1);
size_t bytes_read = fread(text, 1, file_size, file); size_t bytes_read = fread(text, 1, file_size, file);
if (bytes_read != file_size) { if (bytes_read != file_size) {
fprintf(stderr, "error: could not read (%s)\n", strerror(errno)); fprintf(stderr,
"error: could not read %ld/%ld (%s)\n",
bytes_read,
file_size,
strerror(errno));
return EXIT_FAILURE; return EXIT_FAILURE;
} }
fclose(file);
struct tokenizer tokenizer = { text, file_size, 0, 1, 1 }; struct tokenizer tokenizer = { text, file_size, 0, 1, 1, false };
printf("hello\n"); free(text);
char const *keys[] = {
"first",
"second",
"third",
"fourth",
"fifth",
"sixth",
"seventh",
"eigth",
"ninth",
"tenth",
"first",
"seventh",
"tenth",
};
char const *values[] = {
"salery",
"policy",
"strike",
"prophecy",
"break",
"down",
"think",
"about",
"honey",
"and",
"the",
"sweet",
"sounds",
};
struct hashmap map;
hashmap_construct(&map);
for (size_t i = 0; i < sizeof(keys) / sizeof(keys[0]); ++i) {
hashmap_set(&map, keys[i], (void *)values[i]);
}
for (size_t i = 0; i < sizeof(keys) / sizeof(keys[0]); ++i) {
assert(hashmap_has(&map, keys[i]));
}
for (size_t i = 0; i < sizeof(keys) / sizeof(keys[0]); ++i) {
switch (i) {
case 0:
assert(hashmap_get(&map, keys[i]) == values[10]);
break;
case 6:
assert(hashmap_get(&map, keys[i]) == values[11]);
break;
case 9:
assert(hashmap_get(&map, keys[i]) == values[12]);
break;
default:
assert(hashmap_get(&map, keys[i]) == values[i]);
break;
}
}
hashmap_destroy(&map);
} }