init
This commit is contained in:
commit
24be8185c6
18
.clang-format
Normal file
18
.clang-format
Normal file
@ -0,0 +1,18 @@
|
||||
BasedOnStyle: WebKit
|
||||
IndentWidth: 4
|
||||
ColumnLimit: 80
|
||||
|
||||
IndentCaseLabels: true
|
||||
InsertNewlineAtEOF: true
|
||||
AllowShortFunctionsOnASingleLine: None
|
||||
|
||||
BinPackArguments: false
|
||||
BinPackLongBracedList: false
|
||||
BinPackParameters: OnePerLine
|
||||
|
||||
AllowAllArgumentsOnNextLine: true
|
||||
AllowAllParametersOfDeclarationOnNextLine: true
|
||||
|
||||
PointerAlignment: Right
|
||||
QualifierAlignment: Right
|
||||
|
||||
1
.gitignore
vendored
Normal file
1
.gitignore
vendored
Normal file
@ -0,0 +1 @@
|
||||
build/
|
||||
29
Makefile
Normal file
29
Makefile
Normal file
@ -0,0 +1,29 @@
|
||||
|
||||
MAKEFLAGS += -j16
|
||||
|
||||
CFLAGS=-std=c17 -pedantic-errors -Wall -Wextra -Wconversion -fsanitize=address
|
||||
LDFLAGS=
|
||||
|
||||
build_dir = build
|
||||
obj_dir = $(build_dir)/obj
|
||||
|
||||
sources = \
|
||||
src/main.c \
|
||||
src/collections.c
|
||||
|
||||
target=jq
|
||||
|
||||
all: $(build_dir)/$(target)
|
||||
|
||||
$(build_dir)/$(target): $(sources:%.c=$(obj_dir)/%.o)
|
||||
gcc -o $@ $(CFLAGS) $(LDFLAGS) $^
|
||||
|
||||
$(obj_dir)/%.o: %.c
|
||||
@mkdir -p $(dir $@)
|
||||
gcc $< -c -o $@ -MMD -MP $(CFLAGS)
|
||||
|
||||
clean:
|
||||
rm -rf $(build_dir)
|
||||
|
||||
-include $(sources:%.c=$(obj_dir)/%.d)
|
||||
|
||||
6
compile_flags.txt
Normal file
6
compile_flags.txt
Normal file
@ -0,0 +1,6 @@
|
||||
-xc
|
||||
-std=c17
|
||||
-pedantic-errors
|
||||
-Wall
|
||||
-Wextra
|
||||
-Wconversion
|
||||
152
src/collections.c
Normal file
152
src/collections.c
Normal file
@ -0,0 +1,152 @@
|
||||
#include "collections.h"
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
void *array_push(void **data,
|
||||
size_t *capacity,
|
||||
size_t *count,
|
||||
void const *elem,
|
||||
size_t elem_size)
|
||||
{
|
||||
if (!*data) {
|
||||
*capacity = 8;
|
||||
*data = malloc(*capacity * elem_size);
|
||||
*count = 0;
|
||||
} else if (*count + 1 >= *capacity) {
|
||||
*capacity *= 2;
|
||||
*data = realloc(*data, *capacity * elem_size);
|
||||
}
|
||||
void *ptr = &((unsigned char *)*data)[*count * elem_size];
|
||||
if (elem) {
|
||||
memcpy(ptr, elem, elem_size);
|
||||
}
|
||||
return ptr;
|
||||
}
|
||||
|
||||
void *array_insert_at(void **data,
|
||||
size_t *capacity,
|
||||
size_t *count,
|
||||
size_t idx,
|
||||
void const *elem,
|
||||
size_t elem_size)
|
||||
{
|
||||
if (idx >= *count) {
|
||||
return array_push(data, capacity, count, elem, elem_size);
|
||||
}
|
||||
void *src_ptr = &((unsigned char *)*data)[idx * elem_size];
|
||||
void *dest_ptr = &((unsigned char *)*data)[(idx + 1) * elem_size];
|
||||
memmove(dest_ptr, src_ptr, (*count - idx) * elem_size);
|
||||
*count += 1;
|
||||
if (elem) {
|
||||
memcpy(src_ptr, elem, elem_size);
|
||||
}
|
||||
return src_ptr;
|
||||
}
|
||||
|
||||
static uint64_t hash_key(char const *data)
|
||||
{
|
||||
// djb2
|
||||
|
||||
uint64_t hash = 5381;
|
||||
unsigned char c;
|
||||
|
||||
while ((c = (unsigned char)*data++)) {
|
||||
hash = ((hash << 5) + hash) + c; // hash * 33 + c
|
||||
}
|
||||
|
||||
return hash;
|
||||
}
|
||||
|
||||
void hashmap_construct(struct hashmap *t)
|
||||
{
|
||||
*t = (struct hashmap) { NULL, 0, 0 };
|
||||
}
|
||||
|
||||
static size_t const bucket_size = 16;
|
||||
|
||||
static struct hash_entry *find_entry(struct hashmap *t, uint64_t hash)
|
||||
{
|
||||
for (size_t b_idx = 0; b_idx < t->buckets_count; ++b_idx) {
|
||||
struct hash_bucket *bucket = &t->buckets[b_idx];
|
||||
if (hash < bucket->first_hash || hash > bucket->last_hash) {
|
||||
continue;
|
||||
}
|
||||
for (size_t e_idx = 0; e_idx < bucket->count; ++e_idx) {
|
||||
struct hash_entry *entry = &bucket->entries[e_idx];
|
||||
if (entry->hash == hash) {
|
||||
return entry;
|
||||
}
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static struct hash_bucket *insert_bucket_at(struct hashmap *t, size_t idx)
|
||||
{
|
||||
struct hash_bucket bucket = {
|
||||
.entries = malloc(sizeof(struct hash_entry) * bucket_size),
|
||||
.count = 0,
|
||||
.first_hash = 0,
|
||||
.last_hash = 0,
|
||||
};
|
||||
|
||||
return array_insert_at((void **)&t->buckets,
|
||||
&t->buckets_capacity,
|
||||
&t->buckets_count,
|
||||
idx,
|
||||
&bucket,
|
||||
sizeof(struct hash_bucket));
|
||||
}
|
||||
|
||||
static struct hash_entry *make_entry(struct hashmap *t, uint64_t hash)
|
||||
{
|
||||
for (size_t i = 0; i < t->buckets_count; ++i) {
|
||||
struct hash_bucket *bucket = &t->buckets[i];
|
||||
if (hash < bucket->first_hash) {
|
||||
if (bucket->count < bucket_size) {
|
||||
bucket->count += 1;
|
||||
bucket->first_hash = hash;
|
||||
return &bucket->entries[bucket->count - 1];
|
||||
}
|
||||
bucket = insert_bucket_at(t, i);
|
||||
bucket->first_hash = hash;
|
||||
bucket->last_hash = hash;
|
||||
bucket->entries[0].hash = hash;
|
||||
return &bucket->entries[0];
|
||||
}
|
||||
if (hash <= bucket->last_hash) {
|
||||
if (bucket->count >= bucket_size) {
|
||||
bucket = insert_bucket_at(t, i);
|
||||
bucket->first_hash = hash;
|
||||
bucket->last_hash = hash;
|
||||
bucket->entries[0].hash = hash;
|
||||
return &bucket->entries[0];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void hashmap_set(struct hashmap *t, char const *key, void *value)
|
||||
{
|
||||
uint64_t hash = hash_key(key);
|
||||
|
||||
if (t->buckets_count == 0) {
|
||||
struct hash_bucket *bucket = insert_bucket_at(t, 0);
|
||||
bucket->entries[0] = (struct hash_entry) { hash, value };
|
||||
return;
|
||||
}
|
||||
|
||||
struct hash_entry *entry = find_entry(t, hash);
|
||||
if (!entry) {
|
||||
entry = make_entry(t, hash);
|
||||
}
|
||||
entry->value = value;
|
||||
return;
|
||||
}
|
||||
|
||||
void *hashmap_get(struct hashmap *t, char const *key)
|
||||
{
|
||||
uint64_t hash = hash_key(key);
|
||||
}
|
||||
43
src/collections.h
Normal file
43
src/collections.h
Normal file
@ -0,0 +1,43 @@
|
||||
#ifndef COLLECTIONS_H
|
||||
#define COLLECTIONS_H
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
|
||||
void *array_push(void **data,
|
||||
size_t *capacity,
|
||||
size_t *count,
|
||||
void const *elem,
|
||||
size_t elem_size);
|
||||
|
||||
void *array_insert_at(void **data,
|
||||
size_t *capacity,
|
||||
size_t *count,
|
||||
size_t idx,
|
||||
void const *elem,
|
||||
size_t elem_size);
|
||||
|
||||
struct hash_entry {
|
||||
uint64_t hash;
|
||||
void *value;
|
||||
};
|
||||
|
||||
struct hash_bucket {
|
||||
struct hash_entry *entries;
|
||||
size_t count;
|
||||
uint64_t first_hash;
|
||||
uint64_t last_hash;
|
||||
};
|
||||
|
||||
struct hashmap {
|
||||
struct hash_bucket *buckets;
|
||||
size_t buckets_capacity;
|
||||
size_t buckets_count;
|
||||
};
|
||||
|
||||
void hashmap_construct(struct hashmap *t);
|
||||
void hashmap_destroy(struct hashmap *t);
|
||||
void hashmap_set(struct hashmap *t, char const *key, void *value);
|
||||
void *hashmap_get(struct hashmap *t, char const *key);
|
||||
|
||||
#endif
|
||||
240
src/main.c
Normal file
240
src/main.c
Normal file
@ -0,0 +1,240 @@
|
||||
#include <assert.h>
|
||||
#include <errno.h>
|
||||
#include <stdbool.h>
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
|
||||
struct loc {
|
||||
size_t idx;
|
||||
int line;
|
||||
int col;
|
||||
};
|
||||
|
||||
void report(
|
||||
struct loc loc, char const *message, char const *text, size_t text_len)
|
||||
{
|
||||
fprintf(stderr, "error: %s\n", message);
|
||||
if (!text)
|
||||
return;
|
||||
assert(text[loc.idx] != '\n');
|
||||
size_t line_begin_idx = loc.idx;
|
||||
while (line_begin_idx > 0 && text[line_begin_idx] != '\n') {
|
||||
line_begin_idx -= 1;
|
||||
}
|
||||
if (text[line_begin_idx] == '\n') {
|
||||
line_begin_idx += 1;
|
||||
}
|
||||
size_t line_end_idx = loc.idx + 1;
|
||||
while (line_end_idx < text_len && text[line_end_idx] != '\n') {
|
||||
line_end_idx += 1;
|
||||
}
|
||||
if (line_end_idx >= text_len || text[line_end_idx] == '\n') {
|
||||
line_end_idx -= 1;
|
||||
}
|
||||
int linenr_width = snprintf(NULL, 0, "%d", loc.line);
|
||||
static char const *spaces = " ";
|
||||
printf("%.*s|\n"
|
||||
"%d|%.*s\n"
|
||||
"%.*s|%.*s^\n"
|
||||
"%.*s|\n",
|
||||
linenr_width,
|
||||
spaces,
|
||||
loc.line,
|
||||
(int)(line_end_idx - line_begin_idx + 1),
|
||||
&text[line_begin_idx],
|
||||
linenr_width,
|
||||
spaces,
|
||||
loc.col - 1,
|
||||
spaces,
|
||||
linenr_width,
|
||||
spaces);
|
||||
}
|
||||
|
||||
enum tokty {
|
||||
tt_eof,
|
||||
tt_null,
|
||||
tt_false,
|
||||
tt_true,
|
||||
tt_string,
|
||||
tt_float,
|
||||
tt_int = '0',
|
||||
tt_comma = ',',
|
||||
tt_colon = ':',
|
||||
tt_lbracket = '[',
|
||||
tt_rbracket = ']',
|
||||
tt_lbrace = '{',
|
||||
tt_rbrace = '}',
|
||||
};
|
||||
|
||||
struct tok {
|
||||
enum tokty ty;
|
||||
char const *ptr;
|
||||
size_t len;
|
||||
struct loc loc;
|
||||
};
|
||||
|
||||
struct tokenizer {
|
||||
char const *text;
|
||||
size_t len;
|
||||
size_t idx;
|
||||
int line;
|
||||
int col;
|
||||
bool failed;
|
||||
};
|
||||
|
||||
static void t_step(struct tokenizer *t)
|
||||
{
|
||||
if (t->idx >= t->len)
|
||||
return;
|
||||
if (t->text[t->idx] == '\n') {
|
||||
t->line += 1;
|
||||
t->col = 1;
|
||||
} else {
|
||||
t->col += 1;
|
||||
}
|
||||
t->idx += 1;
|
||||
}
|
||||
|
||||
static struct tok t_tok(struct tokenizer *t, enum tokty ty, struct loc loc)
|
||||
{
|
||||
return (struct tok) { ty, &t->text[loc.idx], t->idx - loc.idx, loc };
|
||||
}
|
||||
|
||||
struct tok tokenizer_next(struct tokenizer *t)
|
||||
{
|
||||
struct loc loc = { t->idx, t->line, t->col };
|
||||
size_t *i = &t->idx;
|
||||
if (*i >= t->len) {
|
||||
return t_tok(t, tt_eof, loc);
|
||||
}
|
||||
bool matched = false;
|
||||
while (*i < t->len && strchr(" \t\r\n", t->text[*i]) != NULL) {
|
||||
matched = true;
|
||||
*i += 1;
|
||||
}
|
||||
if (matched) {
|
||||
return tokenizer_next(t);
|
||||
}
|
||||
if (strchr(",:[]{}0", t->text[*i]) != NULL) {
|
||||
enum tokty ty = (enum tokty)t->text[*i];
|
||||
*i += 1;
|
||||
return t_tok(t, ty, loc);
|
||||
}
|
||||
while (*i < t->len && t->text[*i] >= 'a' && t->text[*i] <= 'z') {
|
||||
matched = true;
|
||||
*i += 1;
|
||||
}
|
||||
if (matched) {
|
||||
char const *kws[] = { "null", "false", "true" };
|
||||
enum tokty tys[] = { tt_null, tt_false, tt_true };
|
||||
for (size_t kw_i = 0; kw_i < sizeof(kws) / sizeof(kws[0]); ++kw_i) {
|
||||
if (strncmp(kws[kw_i], &t->text[loc.idx], *i - loc.idx)) {
|
||||
return t_tok(t, tys[kw_i], loc);
|
||||
}
|
||||
}
|
||||
report(loc, "invalid identifier", t->text, t->len);
|
||||
t->failed = true;
|
||||
return tokenizer_next(t);
|
||||
}
|
||||
if (t->text[*i] >= '1' && t->text[*i] <= '9') {
|
||||
while (*i < t->len && t->text[*i] >= '0' && t->text[*i] <= '9') {
|
||||
*i += 1;
|
||||
}
|
||||
enum tokty ty = tt_int;
|
||||
if (*i < t->len && t->text[*i] == '.') {
|
||||
ty = tt_float;
|
||||
*i += 1;
|
||||
while (*i < t->len && t->text[*i] >= '0' && t->text[*i] <= '9') {
|
||||
*i += 1;
|
||||
}
|
||||
}
|
||||
return t_tok(t, ty, loc);
|
||||
}
|
||||
if (t->text[*i] == '\"') {
|
||||
i += 1;
|
||||
while (*i < t->len && t->text[*i] != '\"') {
|
||||
if (t->text[*i] != '\\') {
|
||||
*i += 1;
|
||||
if (*i >= t->len)
|
||||
break;
|
||||
}
|
||||
*i += 1;
|
||||
}
|
||||
if (*i >= t->len && t->text[*i] != '\"') {
|
||||
report(loc, "malformed string", t->text, t->len);
|
||||
t->failed = true;
|
||||
return tokenizer_next(t);
|
||||
}
|
||||
*i += 1;
|
||||
return t_tok(t, tt_string, loc);
|
||||
}
|
||||
report(loc, "illegal character", t->text, t->len);
|
||||
t->failed = true;
|
||||
*i += 1;
|
||||
return tokenizer_next(t);
|
||||
}
|
||||
|
||||
enum valty {
|
||||
vt_null,
|
||||
vt_number,
|
||||
vt_string,
|
||||
vt_array,
|
||||
vt_object,
|
||||
};
|
||||
|
||||
struct val {
|
||||
enum valty ty;
|
||||
union {
|
||||
int64_t int_val;
|
||||
double float_val;
|
||||
char *string_val;
|
||||
struct {
|
||||
struct val *array_data;
|
||||
size_t *array_count;
|
||||
};
|
||||
};
|
||||
};
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
if (argc < 3) {
|
||||
fprintf(stderr, "error: incorrect arguments\n");
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
char const *pattern = argv[1];
|
||||
char const *filename = argv[2];
|
||||
|
||||
FILE *file = fopen(filename, "r");
|
||||
if (!file) {
|
||||
fprintf(stderr,
|
||||
"error: could not open file (%s) \"%s\"\n",
|
||||
strerror(errno),
|
||||
filename);
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
if (fseek(file, 0, SEEK_END) != 0) {
|
||||
fprintf(stderr, "error: could not seek (%s)\n", strerror(errno));
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
long ftell_result = ftell(file);
|
||||
if (ftell_result < 0) {
|
||||
fprintf(stderr, "error: could not tell (%s)\n", strerror(errno));
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
size_t file_size = (size_t)ftell_result;
|
||||
char *text = malloc(file_size + 1);
|
||||
|
||||
size_t bytes_read = fread(text, 1, file_size, file);
|
||||
if (bytes_read != file_size) {
|
||||
fprintf(stderr, "error: could not read (%s)\n", strerror(errno));
|
||||
return EXIT_FAILURE;
|
||||
}
|
||||
|
||||
struct tokenizer tokenizer = { text, file_size, 0, 1, 1 };
|
||||
|
||||
printf("hello\n");
|
||||
}
|
||||
Loading…
x
Reference in New Issue
Block a user