From 0b9398f0158d9a874d3e6c1fc343e452abb2b4d8 Mon Sep 17 00:00:00 2001 From: sfja Date: Fri, 4 Apr 2025 00:26:55 +0200 Subject: [PATCH] emit object header --- asm/build_header.c | 248 +++++++++++++++++++++++++++++++++++++++++++++ asm/build_header.h | 25 +++++ asm/main.c | 150 +++++++-------------------- readme.txt | 44 ++++++++ 4 files changed, 354 insertions(+), 113 deletions(-) create mode 100644 asm/build_header.c create mode 100644 asm/build_header.h create mode 100644 readme.txt diff --git a/asm/build_header.c b/asm/build_header.c new file mode 100644 index 0000000..4037d5d --- /dev/null +++ b/asm/build_header.c @@ -0,0 +1,248 @@ +#include "build_header.h" +#include "report.h" +#include "resolve.h" +#include +#include +#include +#include +#include + +typedef struct { + char* ident; + Loc loc; + uint16_t ip; +} GlobalSym; + +typedef struct { + size_t id; + char* ident; +} ExternSym; + +typedef struct { + size_t id; + uint16_t ip; +} ExternRef; + +#define DEFINE_VEC_FIELDS(T, NAME) \ + size_t NAME##_capacity; \ + T* NAME; \ + size_t NAME##_size; + +struct HeaderBuilder { + DEFINE_VEC_FIELDS(GlobalSym, global_syms) + DEFINE_VEC_FIELDS(ExternSym, extern_syms) + DEFINE_VEC_FIELDS(ExternRef, extern_refs) + size_t program_size; +}; + +#define VEC_CTOR_INIT(T, NAME, CAPACITY) \ + .NAME##_capacity = (CAPACITY), .NAME = malloc(sizeof(T) * (CAPACITY)), \ + .NAME##_size = 0 + +HeaderBuilder* header_builder_new(void) +{ + const size_t global_syms_capacity = 8; + const size_t extern_syms_capacity = 8; + const size_t extern_refs_capacity = 8; + + HeaderBuilder* builder = malloc(sizeof(HeaderBuilder)); + + *builder = (HeaderBuilder) { + VEC_CTOR_INIT(GlobalSym, global_syms, 8), + VEC_CTOR_INIT(ExternSym, extern_syms, 8), + VEC_CTOR_INIT(ExternRef, extern_refs, 8), + .program_size = 0, + }; + + return builder; +} + +void header_builder_free(HeaderBuilder* builder) +{ + for (size_t i = 0; i < builder->global_syms_size; ++i) { + free(builder->global_syms[i].ident); + } + free(builder->global_syms); + + for (size_t i = 0; i < builder->extern_syms_size; ++i) { + free(builder->extern_syms[i].ident); + } + free(builder->extern_syms); + + free(builder->extern_refs); + free(builder); +} + +#define VEC_PUSH(STRUCT, T, NAME, ...) \ + if ((STRUCT)->NAME##_size + 1 > (STRUCT)->NAME##_capacity) { \ + (STRUCT)->NAME##_capacity *= 2; \ + (STRUCT)->NAME \ + = realloc((STRUCT)->NAME, sizeof(T) * (STRUCT)->NAME##_capacity); \ + } \ + (STRUCT)->NAME[(STRUCT)->NAME##_size++] = (T) { __VA_ARGS__ }; + +void header_builder_add_global_sym( + HeaderBuilder* builder, char* ident, Loc loc, uint16_t ip) +{ + VEC_PUSH(builder, GlobalSym, global_syms, ident, loc, ip); +} + +void header_builder_add_extern_sym( + HeaderBuilder* builder, size_t id, char* ident) +{ + VEC_PUSH(builder, ExternSym, extern_syms, id, ident); +} + +void header_builder_add_extern_ref( + HeaderBuilder* builder, size_t id, uint16_t ip) +{ + VEC_PUSH(builder, ExternRef, extern_refs, id, ip); +} + +int header_builder_resolve_global_syms( + HeaderBuilder* builder, IdentResolver* resolver, Reporter* rep) +{ + int result = 0; + for (size_t i = 0; i < builder->global_syms_size; ++i) { + const IdentResol* res + = ident_resolver_resolve(resolver, builder->global_syms[i].ident); + if (res == NULL) { + REPORTF_ERROR( + "undefined global '%s'", builder->global_syms[i].ident); + REPORTF_INFO( + "'%s' declared global here", builder->global_syms[i].ident); + reporter_print_loc(rep, builder->global_syms[i].loc); + result |= -1; + continue; + } else if (res->ty != IdentResolTy_Label) { + REPORTF_ERROR("'%s' cannot be declared global", res->ident); + reporter_print_loc(rep, res->loc); + REPORTF_INFO( + "'%s' declared global here", builder->global_syms[i].ident); + reporter_print_loc(rep, builder->global_syms[i].loc); + result |= -1; + continue; + } + builder->global_syms[i].ip = res->ip; + } + return result; +} + +typedef struct { + FILE* fp; + const char* filename; +} Writer; + +static inline size_t write_words( + Writer* writer, const uint16_t* values, size_t values_size) +{ + size_t amount_written + = fwrite(values, sizeof(uint16_t), values_size, writer->fp); + if (amount_written != values_size) { + REPORTF_ERROR("could not write to output file '%s': %s", + writer->filename, + strerror(errno)); + return 0; + } + return amount_written; +} + +static inline size_t write_word(Writer* writer, uint16_t value) +{ + return write_words(writer, &value, 1); +} + +int header_builder_write(const HeaderBuilder* builder, + FILE* fp, + size_t* total_written, + const char* filename) +{ + typedef uint16_t W; + + size_t header_size = 5 + 1 + 1 + 1 + 1 + 1; + for (size_t i = 0; i < builder->global_syms_size; ++i) + header_size += 1 + 1 + strlen(builder->global_syms[i].ident); + for (size_t i = 0; i < builder->extern_syms_size; ++i) + header_size += 1 + 1 + strlen(builder->extern_syms[i].ident); + for (size_t i = 0; i < builder->extern_refs_size; ++i) + header_size += 1 + 1; + header_size *= 2; + +#define TRY_WRITE_WORDS(VALUES, VALUES_SIZE) \ + do { \ + if ((res = write_words(w, (VALUES), (VALUES_SIZE))) == 0) \ + return -1; \ + *total_written += res; \ + } while (0) +#define TRY_WRITE_WORD(VALUE) \ + do { \ + if ((res = write_word(w, (VALUE))) == 0) \ + return -1; \ + *total_written += res; \ + } while (0) + + Writer writer = { fp, filename }; + Writer* w = &writer; + + *total_written = 0; + size_t res; + + const char* ident = "vc3-object"; + TRY_WRITE_WORDS((W*)ident, 5); + TRY_WRITE_WORD((W)header_size); + TRY_WRITE_WORD((W)builder->global_syms_size); + for (size_t i = 0; i < builder->global_syms_size; ++i) { + const GlobalSym* sym = &builder->global_syms[i]; + TRY_WRITE_WORD(sym->ip); + size_t ident_len = strlen(sym->ident); + TRY_WRITE_WORD((W)ident_len); + // This is safe because of null termination. + TRY_WRITE_WORDS((W*)sym->ident, (ident_len + 1) / 2); + } + TRY_WRITE_WORD((W)builder->extern_syms_size); + for (size_t i = 0; i < builder->extern_syms_size; ++i) { + const ExternSym* sym = &builder->extern_syms[i]; + TRY_WRITE_WORD((W)sym->id); + size_t ident_len = strlen(sym->ident); + TRY_WRITE_WORD((W)ident_len); + // This is safe because of null termination. + TRY_WRITE_WORDS((W*)sym->ident, (ident_len + 1) / 2); + } + TRY_WRITE_WORD((W)builder->extern_refs_size); + for (size_t i = 0; i < builder->extern_refs_size; ++i) { + const ExternRef* ref = &builder->extern_refs[i]; + TRY_WRITE_WORD((W)ref->id); + TRY_WRITE_WORD(ref->ip); + } + TRY_WRITE_WORD((uint16_t)(builder->program_size * 2)); + +#undef TRY_WRITE_WORDS +#undef TRY_WRITE_WORD + + return 0; +} + +void header_builder_print(const HeaderBuilder* builder) +{ + printf("globals:\n"); + printf("address symbol\n"); + printf("-----------------------\n"); + for (size_t i = 0; i < builder->global_syms_size; ++i) { + GlobalSym* sym = &builder->global_syms[i]; + printf("%04x, %s\n", sym->ip, sym->ident); + } + printf("\nexterns:\n"); + printf("id symbol\n"); + printf("--------------------\n"); + for (size_t i = 0; i < builder->extern_syms_size; ++i) { + ExternSym* sym = &builder->extern_syms[i]; + printf("%-3ld %s\n", sym->id, sym->ident); + } + printf("\nextern uses:\n"); + printf("id address\n"); + printf("-----------\n"); + for (size_t i = 0; i < builder->extern_refs_size; ++i) { + ExternRef* ref = &builder->extern_refs[i]; + printf("%3ld %04x\n", ref->id, ref->ip); + } +} diff --git a/asm/build_header.h b/asm/build_header.h new file mode 100644 index 0000000..c09badf --- /dev/null +++ b/asm/build_header.h @@ -0,0 +1,25 @@ +#pragma once + +#include "report.h" +#include "resolve.h" +#include +#include +#include + +typedef struct HeaderBuilder HeaderBuilder; + +HeaderBuilder* header_builder_new(void); +void header_builder_free(HeaderBuilder* builder); +void header_builder_add_global_sym( + HeaderBuilder* builder, char* ident, Loc loc, uint16_t ip); +void header_builder_add_extern_sym( + HeaderBuilder* builder, size_t id, char* ident); +void header_builder_add_extern_ref( + HeaderBuilder* builder, size_t id, uint16_t ip); +int header_builder_resolve_global_syms( + HeaderBuilder* builder, IdentResolver* resolver, Reporter* rep); +int header_builder_write(const HeaderBuilder* builder, + FILE* fp, + size_t* total_written, + const char* filename); +void header_builder_print(const HeaderBuilder* builder); diff --git a/asm/main.c b/asm/main.c index 533cb67..071d23d 100644 --- a/asm/main.c +++ b/asm/main.c @@ -1,7 +1,8 @@ -#include "asm/report.h" #include "assemble.h" +#include "build_header.h" #include "eval.h" #include "parse.h" +#include "report.h" #include "resolve.h" #include "str.h" #include @@ -18,22 +19,6 @@ typedef struct { const char* output_file; } Args; -typedef struct { - char* ident; - Loc loc; - uint16_t ip; -} GlobalSym; - -typedef struct { - size_t id; - char* ident; -} ExternSym; - -typedef struct { - size_t id; - uint16_t ip; -} ExternRef; - static inline Args parse_args(int argc, char** argv); static inline char* read_text_file(const char* filename); static inline int include_file(IdentResolver* resolver, @@ -71,19 +56,13 @@ int main(int argc, char** argv) OperandEvaluator evaluator; operand_evaluator_construct(&evaluator, &resolver, &rep); + HeaderBuilder* header_builder = header_builder_new(); + size_t lines_capacity = 1024; PLine** lines = malloc(sizeof(PLine*) * lines_capacity); size_t lines_size = 0; - size_t global_symbols_capacity = 8; - GlobalSym* global_symbols - = malloc(sizeof(GlobalSym) * global_symbols_capacity); - size_t global_symbols_size = 0; - - size_t extern_symbols_capacity = 8; - ExternSym* extern_symbols - = malloc(sizeof(ExternSym) * extern_symbols_capacity); - size_t extern_symbols_size = 0; + size_t extern_ids = 0; while (!parser_done(&parser)) { PStmt* stmt = parser_next_stmt(&parser); @@ -101,16 +80,8 @@ int main(int argc, char** argv) free(stmt); break; case PStmtTy_Global: - if (global_symbols_size + 1 > global_symbols_capacity) { - global_symbols_capacity *= 2; - global_symbols = realloc(global_symbols, - sizeof(GlobalSym) * global_symbols_capacity); - } - global_symbols[global_symbols_size++] = (GlobalSym) { - .ident = asm_strdup(stmt->ident), - .loc = stmt->loc, - .ip = 0, - }; + header_builder_add_global_sym( + header_builder, asm_strdup(stmt->ident), stmt->loc, 0); pstmt_free(stmt); break; case PStmtTy_Extern: { @@ -121,18 +92,11 @@ int main(int argc, char** argv) pstmt_free(stmt); continue; } - size_t id = extern_symbols_size; + size_t id = extern_ids++; ident_resolver_define_extern( &resolver, asm_strdup(stmt->ident), stmt->loc, id); - if (extern_symbols_size + 1 > extern_symbols_capacity) { - extern_symbols_capacity *= 2; - extern_symbols = realloc(extern_symbols, - sizeof(ExternSym) * extern_symbols_capacity); - } - extern_symbols[extern_symbols_size++] = (ExternSym) { - .ident = asm_strdup(stmt->ident), - .id = id, - }; + header_builder_add_extern_sym( + header_builder, id, asm_strdup(stmt->ident)); pstmt_free(stmt); break; } @@ -171,11 +135,7 @@ int main(int argc, char** argv) } } - errors_occured &= parser_error_occured(&parser); - - size_t extern_refs_capacity = 8; - ExternRef* extern_refs = malloc(sizeof(ExternRef) * extern_refs_capacity); - size_t extern_refs_size = 0; + errors_occured |= parser_error_occured(&parser); size_t chunk_capacity = 64; uint16_t* chunk = malloc(sizeof(uint16_t) * chunk_capacity); @@ -199,16 +159,8 @@ int main(int argc, char** argv) if (chunk[chunk_i] == ex->surrogate) { found = true; - if (extern_refs_size + 1 > extern_refs_capacity) { - extern_refs_capacity *= 2; - extern_refs = realloc(extern_refs, - sizeof(ExternRef) * extern_refs_capacity); - } - extern_refs[extern_refs_size++] = (ExternRef) { - .id = ex->extern_id, - .ip = ip + chunk_i, - }; - + header_builder_add_extern_ref( + header_builder, ex->extern_id, ip + chunk_i); break; } } @@ -217,23 +169,11 @@ int main(int argc, char** argv) ip += size; } + uint16_t program_size = ip; - for (size_t i = 0; i < global_symbols_size; ++i) { - const IdentResol* res - = ident_resolver_resolve(&resolver, global_symbols[i].ident); - if (res == NULL) { - REPORTF_ERROR("undefined global '%s'", global_symbols[i].ident); - REPORTF_INFO("'%s' declared global here", global_symbols[i].ident); - reporter_print_loc(&rep, global_symbols[i].loc); - continue; - } else if (res->ty != IdentResolTy_Label) { - REPORTF_ERROR("'%s' cannot be declared global", res->ident); - reporter_print_loc(&rep, res->loc); - REPORTF_INFO("'%s' declared global here", global_symbols[i].ident); - reporter_print_loc(&rep, global_symbols[i].loc); - continue; - } - global_symbols[i].ip = res->ip; + res = header_builder_resolve_global_syms(header_builder, &resolver, &rep); + if (res != 0) { + errors_occured = true; } if (errors_occured) { @@ -242,29 +182,7 @@ int main(int argc, char** argv) goto leave_free_chunk; } - evaluator.second_pass = true; - - printf("globals:\n"); - printf("symbol address\n"); - printf("-----------------------\n"); - for (size_t i = 0; i < global_symbols_size; ++i) { - GlobalSym* sym = &global_symbols[i]; - printf("%-15s %d\n", sym->ident, sym->ip); - } - printf("\nexterns:\n"); - printf("id symbol\n"); - printf("--------------------\n"); - for (size_t i = 0; i < extern_symbols_size; ++i) { - ExternSym* sym = &extern_symbols[i]; - printf("%-3ld %s\n", sym->id, sym->ident); - } - printf("\nextern uses:\n"); - printf("id address\n"); - printf("-----------\n"); - for (size_t i = 0; i < extern_refs_size; ++i) { - ExternRef* ref = &extern_refs[i]; - printf("%3ld %04x\n", ref->id, ref->ip); - } + header_builder_print(header_builder); FILE* output_fp = fopen(args.output_file, "wb"); if (!output_fp) { @@ -275,7 +193,18 @@ int main(int argc, char** argv) goto leave_free_chunk; } - size_t total_bytes_written = 0; + size_t total_words_written; + res = header_builder_write( + header_builder, output_fp, &total_words_written, args.output_file); + if (res != 0) { + errors_occured = true; + fclose(output_fp); + res = -1; + goto leave_free_chunk; + } + + evaluator.second_pass = true; + for (size_t i = 0; i < lines_size; ++i) { use_labels(&resolver, lines[i]->labels); @@ -287,9 +216,10 @@ int main(int argc, char** argv) if (errors_occured) break; - size_t bytes_written = fwrite(chunk, sizeof(uint16_t), size, output_fp); - total_bytes_written += bytes_written; - if (bytes_written != size) { + size_t amount_written + = fwrite(chunk, sizeof(uint16_t), size, output_fp); + total_words_written += amount_written; + if (amount_written != size) { REPORTF_ERROR("could not write to output file '%s': %s", args.output_file, strerror(errno)); @@ -300,7 +230,7 @@ int main(int argc, char** argv) if (errors_occured) { fprintf( - stderr, "%ld bytes written. stopping...\n", total_bytes_written); + stderr, "%ld bytes written. stopping...\n", total_words_written); res = -1; goto leave_free_chunk; } @@ -311,13 +241,7 @@ leave_free_chunk: // leave_free_lines: for (size_t i = 0; i < lines_size; ++i) pline_free(lines[i]); - for (size_t i = 0; i < global_symbols_size; ++i) - free(global_symbols[i].ident); - free(global_symbols); - for (size_t i = 0; i < extern_symbols_size; ++i) - free(extern_symbols[i].ident); - free(extern_symbols); - free(extern_refs); + header_builder_free(header_builder); free(lines); free(input_text); operand_evaluator_destroy(&evaluator); @@ -441,7 +365,7 @@ static inline int include_file(IdentResolver* resolver, pstmt_free(stmt); } - errors_occured &= parser_error_occured(&parser); + errors_occured |= parser_error_occured(&parser); free(filepath); free(text); diff --git a/readme.txt b/readme.txt new file mode 100644 index 0000000..ea68d64 --- /dev/null +++ b/readme.txt @@ -0,0 +1,44 @@ + +vc3 + +- 16-bit instruction set +- virtual machine +- assembler + +little endian (because x86_64 also is) + +w word = 16 +b byte = 8 + +object file format: + +begin file + begin header + 5w identifier "vc3-object" + 1w header byte size + 1w global table byte size + for each global in global table + 1w symbol address + 1w symbol identifier byte size + *w symbol identifier data 2-byte aligned + + end + 1w extern table byte size + for each extern in extern table + 1w extern id + 1w symbol identifier byte size + *w symbol identifier data 2-byte aligned + end + 1w extern use table byte size + for each used extern in extern use table + 1w extern id + 1w address + end + 1w code size + end + begin code + *w code 2-byte aligned + end +end + +