This commit is contained in:
sfja 2026-03-18 23:55:33 +01:00
commit 24be8185c6
8 changed files with 493 additions and 0 deletions

18
.clang-format Normal file
View File

@ -0,0 +1,18 @@
BasedOnStyle: WebKit
IndentWidth: 4
ColumnLimit: 80
IndentCaseLabels: true
InsertNewlineAtEOF: true
AllowShortFunctionsOnASingleLine: None
BinPackArguments: false
BinPackLongBracedList: false
BinPackParameters: OnePerLine
AllowAllArgumentsOnNextLine: true
AllowAllParametersOfDeclarationOnNextLine: true
PointerAlignment: Right
QualifierAlignment: Right

1
.gitignore vendored Normal file
View File

@ -0,0 +1 @@
build/

29
Makefile Normal file
View File

@ -0,0 +1,29 @@
MAKEFLAGS += -j16
CFLAGS=-std=c17 -pedantic-errors -Wall -Wextra -Wconversion -fsanitize=address
LDFLAGS=
build_dir = build
obj_dir = $(build_dir)/obj
sources = \
src/main.c \
src/collections.c
target=jq
all: $(build_dir)/$(target)
$(build_dir)/$(target): $(sources:%.c=$(obj_dir)/%.o)
gcc -o $@ $(CFLAGS) $(LDFLAGS) $^
$(obj_dir)/%.o: %.c
@mkdir -p $(dir $@)
gcc $< -c -o $@ -MMD -MP $(CFLAGS)
clean:
rm -rf $(build_dir)
-include $(sources:%.c=$(obj_dir)/%.d)

6
compile_flags.txt Normal file
View File

@ -0,0 +1,6 @@
-xc
-std=c17
-pedantic-errors
-Wall
-Wextra
-Wconversion

4
data.json Normal file
View File

@ -0,0 +1,4 @@
{
"foo": "bar"
}

152
src/collections.c Normal file
View File

@ -0,0 +1,152 @@
#include "collections.h"
#include <stddef.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
void *array_push(void **data,
size_t *capacity,
size_t *count,
void const *elem,
size_t elem_size)
{
if (!*data) {
*capacity = 8;
*data = malloc(*capacity * elem_size);
*count = 0;
} else if (*count + 1 >= *capacity) {
*capacity *= 2;
*data = realloc(*data, *capacity * elem_size);
}
void *ptr = &((unsigned char *)*data)[*count * elem_size];
if (elem) {
memcpy(ptr, elem, elem_size);
}
return ptr;
}
void *array_insert_at(void **data,
size_t *capacity,
size_t *count,
size_t idx,
void const *elem,
size_t elem_size)
{
if (idx >= *count) {
return array_push(data, capacity, count, elem, elem_size);
}
void *src_ptr = &((unsigned char *)*data)[idx * elem_size];
void *dest_ptr = &((unsigned char *)*data)[(idx + 1) * elem_size];
memmove(dest_ptr, src_ptr, (*count - idx) * elem_size);
*count += 1;
if (elem) {
memcpy(src_ptr, elem, elem_size);
}
return src_ptr;
}
static uint64_t hash_key(char const *data)
{
// djb2
uint64_t hash = 5381;
unsigned char c;
while ((c = (unsigned char)*data++)) {
hash = ((hash << 5) + hash) + c; // hash * 33 + c
}
return hash;
}
void hashmap_construct(struct hashmap *t)
{
*t = (struct hashmap) { NULL, 0, 0 };
}
static size_t const bucket_size = 16;
static struct hash_entry *find_entry(struct hashmap *t, uint64_t hash)
{
for (size_t b_idx = 0; b_idx < t->buckets_count; ++b_idx) {
struct hash_bucket *bucket = &t->buckets[b_idx];
if (hash < bucket->first_hash || hash > bucket->last_hash) {
continue;
}
for (size_t e_idx = 0; e_idx < bucket->count; ++e_idx) {
struct hash_entry *entry = &bucket->entries[e_idx];
if (entry->hash == hash) {
return entry;
}
}
}
return NULL;
}
static struct hash_bucket *insert_bucket_at(struct hashmap *t, size_t idx)
{
struct hash_bucket bucket = {
.entries = malloc(sizeof(struct hash_entry) * bucket_size),
.count = 0,
.first_hash = 0,
.last_hash = 0,
};
return array_insert_at((void **)&t->buckets,
&t->buckets_capacity,
&t->buckets_count,
idx,
&bucket,
sizeof(struct hash_bucket));
}
static struct hash_entry *make_entry(struct hashmap *t, uint64_t hash)
{
for (size_t i = 0; i < t->buckets_count; ++i) {
struct hash_bucket *bucket = &t->buckets[i];
if (hash < bucket->first_hash) {
if (bucket->count < bucket_size) {
bucket->count += 1;
bucket->first_hash = hash;
return &bucket->entries[bucket->count - 1];
}
bucket = insert_bucket_at(t, i);
bucket->first_hash = hash;
bucket->last_hash = hash;
bucket->entries[0].hash = hash;
return &bucket->entries[0];
}
if (hash <= bucket->last_hash) {
if (bucket->count >= bucket_size) {
bucket = insert_bucket_at(t, i);
bucket->first_hash = hash;
bucket->last_hash = hash;
bucket->entries[0].hash = hash;
return &bucket->entries[0];
}
}
}
}
void hashmap_set(struct hashmap *t, char const *key, void *value)
{
uint64_t hash = hash_key(key);
if (t->buckets_count == 0) {
struct hash_bucket *bucket = insert_bucket_at(t, 0);
bucket->entries[0] = (struct hash_entry) { hash, value };
return;
}
struct hash_entry *entry = find_entry(t, hash);
if (!entry) {
entry = make_entry(t, hash);
}
entry->value = value;
return;
}
void *hashmap_get(struct hashmap *t, char const *key)
{
uint64_t hash = hash_key(key);
}

43
src/collections.h Normal file
View File

@ -0,0 +1,43 @@
#ifndef COLLECTIONS_H
#define COLLECTIONS_H
#include <stddef.h>
#include <stdint.h>
void *array_push(void **data,
size_t *capacity,
size_t *count,
void const *elem,
size_t elem_size);
void *array_insert_at(void **data,
size_t *capacity,
size_t *count,
size_t idx,
void const *elem,
size_t elem_size);
struct hash_entry {
uint64_t hash;
void *value;
};
struct hash_bucket {
struct hash_entry *entries;
size_t count;
uint64_t first_hash;
uint64_t last_hash;
};
struct hashmap {
struct hash_bucket *buckets;
size_t buckets_capacity;
size_t buckets_count;
};
void hashmap_construct(struct hashmap *t);
void hashmap_destroy(struct hashmap *t);
void hashmap_set(struct hashmap *t, char const *key, void *value);
void *hashmap_get(struct hashmap *t, char const *key);
#endif

240
src/main.c Normal file
View File

@ -0,0 +1,240 @@
#include <assert.h>
#include <errno.h>
#include <stdbool.h>
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
struct loc {
size_t idx;
int line;
int col;
};
void report(
struct loc loc, char const *message, char const *text, size_t text_len)
{
fprintf(stderr, "error: %s\n", message);
if (!text)
return;
assert(text[loc.idx] != '\n');
size_t line_begin_idx = loc.idx;
while (line_begin_idx > 0 && text[line_begin_idx] != '\n') {
line_begin_idx -= 1;
}
if (text[line_begin_idx] == '\n') {
line_begin_idx += 1;
}
size_t line_end_idx = loc.idx + 1;
while (line_end_idx < text_len && text[line_end_idx] != '\n') {
line_end_idx += 1;
}
if (line_end_idx >= text_len || text[line_end_idx] == '\n') {
line_end_idx -= 1;
}
int linenr_width = snprintf(NULL, 0, "%d", loc.line);
static char const *spaces = " ";
printf("%.*s|\n"
"%d|%.*s\n"
"%.*s|%.*s^\n"
"%.*s|\n",
linenr_width,
spaces,
loc.line,
(int)(line_end_idx - line_begin_idx + 1),
&text[line_begin_idx],
linenr_width,
spaces,
loc.col - 1,
spaces,
linenr_width,
spaces);
}
enum tokty {
tt_eof,
tt_null,
tt_false,
tt_true,
tt_string,
tt_float,
tt_int = '0',
tt_comma = ',',
tt_colon = ':',
tt_lbracket = '[',
tt_rbracket = ']',
tt_lbrace = '{',
tt_rbrace = '}',
};
struct tok {
enum tokty ty;
char const *ptr;
size_t len;
struct loc loc;
};
struct tokenizer {
char const *text;
size_t len;
size_t idx;
int line;
int col;
bool failed;
};
static void t_step(struct tokenizer *t)
{
if (t->idx >= t->len)
return;
if (t->text[t->idx] == '\n') {
t->line += 1;
t->col = 1;
} else {
t->col += 1;
}
t->idx += 1;
}
static struct tok t_tok(struct tokenizer *t, enum tokty ty, struct loc loc)
{
return (struct tok) { ty, &t->text[loc.idx], t->idx - loc.idx, loc };
}
struct tok tokenizer_next(struct tokenizer *t)
{
struct loc loc = { t->idx, t->line, t->col };
size_t *i = &t->idx;
if (*i >= t->len) {
return t_tok(t, tt_eof, loc);
}
bool matched = false;
while (*i < t->len && strchr(" \t\r\n", t->text[*i]) != NULL) {
matched = true;
*i += 1;
}
if (matched) {
return tokenizer_next(t);
}
if (strchr(",:[]{}0", t->text[*i]) != NULL) {
enum tokty ty = (enum tokty)t->text[*i];
*i += 1;
return t_tok(t, ty, loc);
}
while (*i < t->len && t->text[*i] >= 'a' && t->text[*i] <= 'z') {
matched = true;
*i += 1;
}
if (matched) {
char const *kws[] = { "null", "false", "true" };
enum tokty tys[] = { tt_null, tt_false, tt_true };
for (size_t kw_i = 0; kw_i < sizeof(kws) / sizeof(kws[0]); ++kw_i) {
if (strncmp(kws[kw_i], &t->text[loc.idx], *i - loc.idx)) {
return t_tok(t, tys[kw_i], loc);
}
}
report(loc, "invalid identifier", t->text, t->len);
t->failed = true;
return tokenizer_next(t);
}
if (t->text[*i] >= '1' && t->text[*i] <= '9') {
while (*i < t->len && t->text[*i] >= '0' && t->text[*i] <= '9') {
*i += 1;
}
enum tokty ty = tt_int;
if (*i < t->len && t->text[*i] == '.') {
ty = tt_float;
*i += 1;
while (*i < t->len && t->text[*i] >= '0' && t->text[*i] <= '9') {
*i += 1;
}
}
return t_tok(t, ty, loc);
}
if (t->text[*i] == '\"') {
i += 1;
while (*i < t->len && t->text[*i] != '\"') {
if (t->text[*i] != '\\') {
*i += 1;
if (*i >= t->len)
break;
}
*i += 1;
}
if (*i >= t->len && t->text[*i] != '\"') {
report(loc, "malformed string", t->text, t->len);
t->failed = true;
return tokenizer_next(t);
}
*i += 1;
return t_tok(t, tt_string, loc);
}
report(loc, "illegal character", t->text, t->len);
t->failed = true;
*i += 1;
return tokenizer_next(t);
}
enum valty {
vt_null,
vt_number,
vt_string,
vt_array,
vt_object,
};
struct val {
enum valty ty;
union {
int64_t int_val;
double float_val;
char *string_val;
struct {
struct val *array_data;
size_t *array_count;
};
};
};
int main(int argc, char *argv[])
{
if (argc < 3) {
fprintf(stderr, "error: incorrect arguments\n");
return EXIT_FAILURE;
}
char const *pattern = argv[1];
char const *filename = argv[2];
FILE *file = fopen(filename, "r");
if (!file) {
fprintf(stderr,
"error: could not open file (%s) \"%s\"\n",
strerror(errno),
filename);
return EXIT_FAILURE;
}
if (fseek(file, 0, SEEK_END) != 0) {
fprintf(stderr, "error: could not seek (%s)\n", strerror(errno));
return EXIT_FAILURE;
}
long ftell_result = ftell(file);
if (ftell_result < 0) {
fprintf(stderr, "error: could not tell (%s)\n", strerror(errno));
return EXIT_FAILURE;
}
size_t file_size = (size_t)ftell_result;
char *text = malloc(file_size + 1);
size_t bytes_read = fread(text, 1, file_size, file);
if (bytes_read != file_size) {
fprintf(stderr, "error: could not read (%s)\n", strerror(errno));
return EXIT_FAILURE;
}
struct tokenizer tokenizer = { text, file_size, 0, 1, 1 };
printf("hello\n");
}