debug hashmap
This commit is contained in:
parent
24be8185c6
commit
e36f1e1b81
1
.gitignore
vendored
1
.gitignore
vendored
@ -1 +1,2 @@
|
|||||||
build/
|
build/
|
||||||
|
.vscode/settings.json
|
||||||
|
|||||||
27
.vscode/launch.json
vendored
Normal file
27
.vscode/launch.json
vendored
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
{
|
||||||
|
// Use IntelliSense to learn about possible attributes.
|
||||||
|
// Hover to view descriptions of existing attributes.
|
||||||
|
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
||||||
|
"version": "0.2.0",
|
||||||
|
"configurations": [
|
||||||
|
{
|
||||||
|
"name": "Debug",
|
||||||
|
"type": "cppdbg",
|
||||||
|
"request": "launch",
|
||||||
|
"program": "${workspaceFolder}/build/jq",
|
||||||
|
"args": ["", "data.json"],
|
||||||
|
"stopAtEntry": true,
|
||||||
|
"cwd": "${workspaceFolder}",
|
||||||
|
"environment": [],
|
||||||
|
"MIMode": "gdb",
|
||||||
|
"preLaunchTask": "build",
|
||||||
|
"setupCommands": [
|
||||||
|
{
|
||||||
|
"description": "Enable pretty-printing for gdb",
|
||||||
|
"text": "-enable-pretty-printing",
|
||||||
|
"ignoreFailures": true
|
||||||
|
},
|
||||||
|
]
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
12
.vscode/tasks.json
vendored
Normal file
12
.vscode/tasks.json
vendored
Normal file
@ -0,0 +1,12 @@
|
|||||||
|
{
|
||||||
|
// See https://go.microsoft.com/fwlink/?LinkId=733558
|
||||||
|
// for the documentation about the tasks.json format
|
||||||
|
"version": "2.0.0",
|
||||||
|
"tasks": [
|
||||||
|
{
|
||||||
|
"label": "build",
|
||||||
|
"type": "shell",
|
||||||
|
"command": "make all"
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
13
Makefile
13
Makefile
@ -1,9 +1,11 @@
|
|||||||
|
|
||||||
MAKEFLAGS += -j16
|
MAKEFLAGS += -j16
|
||||||
|
|
||||||
CFLAGS=-std=c17 -pedantic-errors -Wall -Wextra -Wconversion -fsanitize=address
|
CFLAGS=-std=c17 -pedantic-errors -Wall -Wextra -Wconversion -g
|
||||||
LDFLAGS=
|
LDFLAGS=
|
||||||
|
|
||||||
|
# CFLAGS+=-fsanitize=address
|
||||||
|
|
||||||
build_dir = build
|
build_dir = build
|
||||||
obj_dir = $(build_dir)/obj
|
obj_dir = $(build_dir)/obj
|
||||||
|
|
||||||
@ -11,11 +13,14 @@ sources = \
|
|||||||
src/main.c \
|
src/main.c \
|
||||||
src/collections.c
|
src/collections.c
|
||||||
|
|
||||||
target=jq
|
target=$(build_dir)/jq
|
||||||
|
|
||||||
all: $(build_dir)/$(target)
|
all: $(target)
|
||||||
|
|
||||||
$(build_dir)/$(target): $(sources:%.c=$(obj_dir)/%.o)
|
debug: $(target)
|
||||||
|
gdb tui -ex 'b main.c:254' -ex 'b collections.c:161' -ex 'r' --args build/jq '.' data.json
|
||||||
|
|
||||||
|
$(target): $(sources:%.c=$(obj_dir)/%.o)
|
||||||
gcc -o $@ $(CFLAGS) $(LDFLAGS) $^
|
gcc -o $@ $(CFLAGS) $(LDFLAGS) $^
|
||||||
|
|
||||||
$(obj_dir)/%.o: %.c
|
$(obj_dir)/%.o: %.c
|
||||||
|
|||||||
@ -1,4 +1,6 @@
|
|||||||
#include "collections.h"
|
#include "collections.h"
|
||||||
|
#include <assert.h>
|
||||||
|
#include <stdbool.h>
|
||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
@ -14,7 +16,7 @@ void *array_push(void **data,
|
|||||||
*capacity = 8;
|
*capacity = 8;
|
||||||
*data = malloc(*capacity * elem_size);
|
*data = malloc(*capacity * elem_size);
|
||||||
*count = 0;
|
*count = 0;
|
||||||
} else if (*count + 1 >= *capacity) {
|
} else if (*count + 1 > *capacity) {
|
||||||
*capacity *= 2;
|
*capacity *= 2;
|
||||||
*data = realloc(*data, *capacity * elem_size);
|
*data = realloc(*data, *capacity * elem_size);
|
||||||
}
|
}
|
||||||
@ -22,6 +24,7 @@ void *array_push(void **data,
|
|||||||
if (elem) {
|
if (elem) {
|
||||||
memcpy(ptr, elem, elem_size);
|
memcpy(ptr, elem, elem_size);
|
||||||
}
|
}
|
||||||
|
*count += 1;
|
||||||
return ptr;
|
return ptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -35,6 +38,9 @@ void *array_insert_at(void **data,
|
|||||||
if (idx >= *count) {
|
if (idx >= *count) {
|
||||||
return array_push(data, capacity, count, elem, elem_size);
|
return array_push(data, capacity, count, elem, elem_size);
|
||||||
}
|
}
|
||||||
|
if (*count + 1 > *capacity) {
|
||||||
|
array_push(data, capacity, count, NULL, elem_size);
|
||||||
|
}
|
||||||
void *src_ptr = &((unsigned char *)*data)[idx * elem_size];
|
void *src_ptr = &((unsigned char *)*data)[idx * elem_size];
|
||||||
void *dest_ptr = &((unsigned char *)*data)[(idx + 1) * elem_size];
|
void *dest_ptr = &((unsigned char *)*data)[(idx + 1) * elem_size];
|
||||||
memmove(dest_ptr, src_ptr, (*count - idx) * elem_size);
|
memmove(dest_ptr, src_ptr, (*count - idx) * elem_size);
|
||||||
@ -64,12 +70,17 @@ void hashmap_construct(struct hashmap *t)
|
|||||||
*t = (struct hashmap) { NULL, 0, 0 };
|
*t = (struct hashmap) { NULL, 0, 0 };
|
||||||
}
|
}
|
||||||
|
|
||||||
static size_t const bucket_size = 16;
|
void hashmap_destroy(struct hashmap *t)
|
||||||
|
|
||||||
static struct hash_entry *find_entry(struct hashmap *t, uint64_t hash)
|
|
||||||
{
|
{
|
||||||
for (size_t b_idx = 0; b_idx < t->buckets_count; ++b_idx) {
|
if (t->buckets) {
|
||||||
struct hash_bucket *bucket = &t->buckets[b_idx];
|
free(t->buckets);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
static struct hash_entry *find_entry(struct hashmap *m, uint64_t hash)
|
||||||
|
{
|
||||||
|
for (size_t b_idx = 0; b_idx < m->buckets_count; ++b_idx) {
|
||||||
|
struct hash_bucket *bucket = &m->buckets[b_idx];
|
||||||
if (hash < bucket->first_hash || hash > bucket->last_hash) {
|
if (hash < bucket->first_hash || hash > bucket->last_hash) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -83,70 +94,97 @@ static struct hash_entry *find_entry(struct hashmap *t, uint64_t hash)
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct hash_bucket *insert_bucket_at(struct hashmap *t, size_t idx)
|
static struct hash_bucket *insert_bucket_at(struct hashmap *m, size_t idx)
|
||||||
{
|
{
|
||||||
struct hash_bucket bucket = {
|
struct hash_bucket bucket = {
|
||||||
.entries = malloc(sizeof(struct hash_entry) * bucket_size),
|
.entries = { { 0 } },
|
||||||
.count = 0,
|
.count = 0,
|
||||||
.first_hash = 0,
|
.first_hash = 0,
|
||||||
.last_hash = 0,
|
.last_hash = 0,
|
||||||
};
|
};
|
||||||
|
|
||||||
return array_insert_at((void **)&t->buckets,
|
return array_insert_at((void **)&m->buckets,
|
||||||
&t->buckets_capacity,
|
&m->buckets_capacity,
|
||||||
&t->buckets_count,
|
&m->buckets_count,
|
||||||
idx,
|
idx,
|
||||||
&bucket,
|
&bucket,
|
||||||
sizeof(struct hash_bucket));
|
sizeof(struct hash_bucket));
|
||||||
}
|
}
|
||||||
|
|
||||||
static struct hash_entry *make_entry(struct hashmap *t, uint64_t hash)
|
#define MIN(A, B) ((A) <= (B) ? (A) : (B))
|
||||||
|
#define MAX(A, B) ((A) >= (B) ? (A) : (B))
|
||||||
|
|
||||||
|
static struct hash_entry *add_entry_to_bucket(
|
||||||
|
struct hash_bucket *b, uint64_t hash)
|
||||||
{
|
{
|
||||||
for (size_t i = 0; i < t->buckets_count; ++i) {
|
b->count += 1;
|
||||||
struct hash_bucket *bucket = &t->buckets[i];
|
b->first_hash = MIN(b->first_hash, hash);
|
||||||
if (hash < bucket->first_hash) {
|
b->last_hash = MAX(b->last_hash, hash);
|
||||||
if (bucket->count < bucket_size) {
|
return &b->entries[b->count - 1];
|
||||||
bucket->count += 1;
|
|
||||||
bucket->first_hash = hash;
|
|
||||||
return &bucket->entries[bucket->count - 1];
|
|
||||||
}
|
|
||||||
bucket = insert_bucket_at(t, i);
|
|
||||||
bucket->first_hash = hash;
|
|
||||||
bucket->last_hash = hash;
|
|
||||||
bucket->entries[0].hash = hash;
|
|
||||||
return &bucket->entries[0];
|
|
||||||
}
|
|
||||||
if (hash <= bucket->last_hash) {
|
|
||||||
if (bucket->count >= bucket_size) {
|
|
||||||
bucket = insert_bucket_at(t, i);
|
|
||||||
bucket->first_hash = hash;
|
|
||||||
bucket->last_hash = hash;
|
|
||||||
bucket->entries[0].hash = hash;
|
|
||||||
return &bucket->entries[0];
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
void hashmap_set(struct hashmap *t, char const *key, void *value)
|
static struct hash_entry *make_entry(struct hashmap *m, uint64_t hash)
|
||||||
|
{
|
||||||
|
assert(m->buckets_count >= 1 && m->buckets[0].count != 0);
|
||||||
|
|
||||||
|
if (hash < m->buckets[0].first_hash
|
||||||
|
&& m->buckets[0].count < bucket_capacity) {
|
||||||
|
|
||||||
|
return add_entry_to_bucket(&m->buckets[0], hash);
|
||||||
|
}
|
||||||
|
for (size_t i = 0; i < m->buckets_count; ++i) {
|
||||||
|
struct hash_bucket *curr = &m->buckets[i];
|
||||||
|
|
||||||
|
struct hash_bucket *next
|
||||||
|
= i + 1 < m->buckets_count ? &m->buckets[i + 1] : NULL;
|
||||||
|
|
||||||
|
if (next && hash >= next->first_hash)
|
||||||
|
continue;
|
||||||
|
|
||||||
|
if (curr->count < bucket_capacity) {
|
||||||
|
return add_entry_to_bucket(curr, hash);
|
||||||
|
} else if (next && next->count < bucket_capacity) {
|
||||||
|
return add_entry_to_bucket(next, hash);
|
||||||
|
} else {
|
||||||
|
struct hash_bucket *b = insert_bucket_at(m, i + 1);
|
||||||
|
b->first_hash = UINT64_MAX;
|
||||||
|
return add_entry_to_bucket(b, hash);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assert(false);
|
||||||
|
}
|
||||||
|
|
||||||
|
void hashmap_set(struct hashmap *m, char const *key, void *value)
|
||||||
{
|
{
|
||||||
uint64_t hash = hash_key(key);
|
uint64_t hash = hash_key(key);
|
||||||
|
|
||||||
if (t->buckets_count == 0) {
|
if (m->buckets_count == 0) {
|
||||||
struct hash_bucket *bucket = insert_bucket_at(t, 0);
|
struct hash_bucket *bucket = insert_bucket_at(m, 0);
|
||||||
bucket->entries[0] = (struct hash_entry) { hash, value };
|
bucket->entries[0] = (struct hash_entry) { hash, value };
|
||||||
|
bucket->count += 1;
|
||||||
|
bucket->first_hash = hash;
|
||||||
|
bucket->last_hash = hash;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
struct hash_entry *entry = find_entry(t, hash);
|
struct hash_entry *entry = find_entry(m, hash);
|
||||||
if (!entry) {
|
if (!entry) {
|
||||||
entry = make_entry(t, hash);
|
entry = make_entry(m, hash);
|
||||||
|
entry->hash = hash;
|
||||||
}
|
}
|
||||||
entry->value = value;
|
entry->value = value;
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
void *hashmap_get(struct hashmap *t, char const *key)
|
bool hashmap_has(struct hashmap *m, char const *key)
|
||||||
{
|
{
|
||||||
uint64_t hash = hash_key(key);
|
uint64_t hash = hash_key(key);
|
||||||
|
return find_entry(m, hash) != NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
void *hashmap_get(struct hashmap *m, char const *key)
|
||||||
|
{
|
||||||
|
uint64_t hash = hash_key(key);
|
||||||
|
struct hash_entry *entry = find_entry(m, hash);
|
||||||
|
return entry ? entry->value : NULL;
|
||||||
}
|
}
|
||||||
|
|||||||
@ -1,6 +1,7 @@
|
|||||||
#ifndef COLLECTIONS_H
|
#ifndef COLLECTIONS_H
|
||||||
#define COLLECTIONS_H
|
#define COLLECTIONS_H
|
||||||
|
|
||||||
|
#include <stdbool.h>
|
||||||
#include <stddef.h>
|
#include <stddef.h>
|
||||||
#include <stdint.h>
|
#include <stdint.h>
|
||||||
|
|
||||||
@ -22,8 +23,10 @@ struct hash_entry {
|
|||||||
void *value;
|
void *value;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
#define bucket_capacity 16
|
||||||
|
|
||||||
struct hash_bucket {
|
struct hash_bucket {
|
||||||
struct hash_entry *entries;
|
struct hash_entry entries[bucket_capacity];
|
||||||
size_t count;
|
size_t count;
|
||||||
uint64_t first_hash;
|
uint64_t first_hash;
|
||||||
uint64_t last_hash;
|
uint64_t last_hash;
|
||||||
@ -35,9 +38,10 @@ struct hashmap {
|
|||||||
size_t buckets_count;
|
size_t buckets_count;
|
||||||
};
|
};
|
||||||
|
|
||||||
void hashmap_construct(struct hashmap *t);
|
void hashmap_construct(struct hashmap *m);
|
||||||
void hashmap_destroy(struct hashmap *t);
|
void hashmap_destroy(struct hashmap *m);
|
||||||
void hashmap_set(struct hashmap *t, char const *key, void *value);
|
void hashmap_set(struct hashmap *m, char const *key, void *value);
|
||||||
void *hashmap_get(struct hashmap *t, char const *key);
|
bool hashmap_has(struct hashmap *m, char const *key);
|
||||||
|
void *hashmap_get(struct hashmap *m, char const *key);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
79
src/main.c
79
src/main.c
@ -1,3 +1,4 @@
|
|||||||
|
#include "collections.h"
|
||||||
#include <assert.h>
|
#include <assert.h>
|
||||||
#include <errno.h>
|
#include <errno.h>
|
||||||
#include <stdbool.h>
|
#include <stdbool.h>
|
||||||
@ -216,25 +217,89 @@ int main(int argc, char *argv[])
|
|||||||
return EXIT_FAILURE;
|
return EXIT_FAILURE;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (fseek(file, 0, SEEK_END) != 0) {
|
fseek(file, 0, SEEK_END);
|
||||||
fprintf(stderr, "error: could not seek (%s)\n", strerror(errno));
|
|
||||||
return EXIT_FAILURE;
|
|
||||||
}
|
|
||||||
long ftell_result = ftell(file);
|
long ftell_result = ftell(file);
|
||||||
if (ftell_result < 0) {
|
if (ftell_result < 0) {
|
||||||
fprintf(stderr, "error: could not tell (%s)\n", strerror(errno));
|
fprintf(stderr, "error: could not tell (%s)\n", strerror(errno));
|
||||||
return EXIT_FAILURE;
|
return EXIT_FAILURE;
|
||||||
}
|
}
|
||||||
|
fseek(file, 0, SEEK_SET);
|
||||||
size_t file_size = (size_t)ftell_result;
|
size_t file_size = (size_t)ftell_result;
|
||||||
char *text = malloc(file_size + 1);
|
char *text = malloc(file_size + 1);
|
||||||
|
|
||||||
size_t bytes_read = fread(text, 1, file_size, file);
|
size_t bytes_read = fread(text, 1, file_size, file);
|
||||||
if (bytes_read != file_size) {
|
if (bytes_read != file_size) {
|
||||||
fprintf(stderr, "error: could not read (%s)\n", strerror(errno));
|
fprintf(stderr,
|
||||||
|
"error: could not read %ld/%ld (%s)\n",
|
||||||
|
bytes_read,
|
||||||
|
file_size,
|
||||||
|
strerror(errno));
|
||||||
return EXIT_FAILURE;
|
return EXIT_FAILURE;
|
||||||
}
|
}
|
||||||
|
fclose(file);
|
||||||
|
|
||||||
struct tokenizer tokenizer = { text, file_size, 0, 1, 1 };
|
struct tokenizer tokenizer = { text, file_size, 0, 1, 1, false };
|
||||||
|
|
||||||
printf("hello\n");
|
free(text);
|
||||||
|
|
||||||
|
char const *keys[] = {
|
||||||
|
"first",
|
||||||
|
"second",
|
||||||
|
"third",
|
||||||
|
"fourth",
|
||||||
|
"fifth",
|
||||||
|
"sixth",
|
||||||
|
"seventh",
|
||||||
|
"eigth",
|
||||||
|
"ninth",
|
||||||
|
"tenth",
|
||||||
|
"first",
|
||||||
|
"seventh",
|
||||||
|
"tenth",
|
||||||
|
};
|
||||||
|
char const *values[] = {
|
||||||
|
"salery",
|
||||||
|
"policy",
|
||||||
|
"strike",
|
||||||
|
"prophecy",
|
||||||
|
"break",
|
||||||
|
"down",
|
||||||
|
"think",
|
||||||
|
"about",
|
||||||
|
"honey",
|
||||||
|
"and",
|
||||||
|
"the",
|
||||||
|
"sweet",
|
||||||
|
"sounds",
|
||||||
|
};
|
||||||
|
|
||||||
|
struct hashmap map;
|
||||||
|
hashmap_construct(&map);
|
||||||
|
|
||||||
|
for (size_t i = 0; i < sizeof(keys) / sizeof(keys[0]); ++i) {
|
||||||
|
hashmap_set(&map, keys[i], (void *)values[i]);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (size_t i = 0; i < sizeof(keys) / sizeof(keys[0]); ++i) {
|
||||||
|
assert(hashmap_has(&map, keys[i]));
|
||||||
|
}
|
||||||
|
|
||||||
|
for (size_t i = 0; i < sizeof(keys) / sizeof(keys[0]); ++i) {
|
||||||
|
switch (i) {
|
||||||
|
case 0:
|
||||||
|
assert(hashmap_get(&map, keys[i]) == values[10]);
|
||||||
|
break;
|
||||||
|
case 6:
|
||||||
|
assert(hashmap_get(&map, keys[i]) == values[11]);
|
||||||
|
break;
|
||||||
|
case 9:
|
||||||
|
assert(hashmap_get(&map, keys[i]) == values[12]);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
assert(hashmap_get(&map, keys[i]) == values[i]);
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
hashmap_destroy(&map);
|
||||||
}
|
}
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user