emit object header

This commit is contained in:
sfja 2025-04-04 00:26:55 +02:00
parent 958c37f7b0
commit 0b9398f015
4 changed files with 354 additions and 113 deletions

248
asm/build_header.c Normal file
View File

@ -0,0 +1,248 @@
#include "build_header.h"
#include "report.h"
#include "resolve.h"
#include <errno.h>
#include <stdint.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
typedef struct {
char* ident;
Loc loc;
uint16_t ip;
} GlobalSym;
typedef struct {
size_t id;
char* ident;
} ExternSym;
typedef struct {
size_t id;
uint16_t ip;
} ExternRef;
#define DEFINE_VEC_FIELDS(T, NAME) \
size_t NAME##_capacity; \
T* NAME; \
size_t NAME##_size;
struct HeaderBuilder {
DEFINE_VEC_FIELDS(GlobalSym, global_syms)
DEFINE_VEC_FIELDS(ExternSym, extern_syms)
DEFINE_VEC_FIELDS(ExternRef, extern_refs)
size_t program_size;
};
#define VEC_CTOR_INIT(T, NAME, CAPACITY) \
.NAME##_capacity = (CAPACITY), .NAME = malloc(sizeof(T) * (CAPACITY)), \
.NAME##_size = 0
HeaderBuilder* header_builder_new(void)
{
const size_t global_syms_capacity = 8;
const size_t extern_syms_capacity = 8;
const size_t extern_refs_capacity = 8;
HeaderBuilder* builder = malloc(sizeof(HeaderBuilder));
*builder = (HeaderBuilder) {
VEC_CTOR_INIT(GlobalSym, global_syms, 8),
VEC_CTOR_INIT(ExternSym, extern_syms, 8),
VEC_CTOR_INIT(ExternRef, extern_refs, 8),
.program_size = 0,
};
return builder;
}
void header_builder_free(HeaderBuilder* builder)
{
for (size_t i = 0; i < builder->global_syms_size; ++i) {
free(builder->global_syms[i].ident);
}
free(builder->global_syms);
for (size_t i = 0; i < builder->extern_syms_size; ++i) {
free(builder->extern_syms[i].ident);
}
free(builder->extern_syms);
free(builder->extern_refs);
free(builder);
}
#define VEC_PUSH(STRUCT, T, NAME, ...) \
if ((STRUCT)->NAME##_size + 1 > (STRUCT)->NAME##_capacity) { \
(STRUCT)->NAME##_capacity *= 2; \
(STRUCT)->NAME \
= realloc((STRUCT)->NAME, sizeof(T) * (STRUCT)->NAME##_capacity); \
} \
(STRUCT)->NAME[(STRUCT)->NAME##_size++] = (T) { __VA_ARGS__ };
void header_builder_add_global_sym(
HeaderBuilder* builder, char* ident, Loc loc, uint16_t ip)
{
VEC_PUSH(builder, GlobalSym, global_syms, ident, loc, ip);
}
void header_builder_add_extern_sym(
HeaderBuilder* builder, size_t id, char* ident)
{
VEC_PUSH(builder, ExternSym, extern_syms, id, ident);
}
void header_builder_add_extern_ref(
HeaderBuilder* builder, size_t id, uint16_t ip)
{
VEC_PUSH(builder, ExternRef, extern_refs, id, ip);
}
int header_builder_resolve_global_syms(
HeaderBuilder* builder, IdentResolver* resolver, Reporter* rep)
{
int result = 0;
for (size_t i = 0; i < builder->global_syms_size; ++i) {
const IdentResol* res
= ident_resolver_resolve(resolver, builder->global_syms[i].ident);
if (res == NULL) {
REPORTF_ERROR(
"undefined global '%s'", builder->global_syms[i].ident);
REPORTF_INFO(
"'%s' declared global here", builder->global_syms[i].ident);
reporter_print_loc(rep, builder->global_syms[i].loc);
result |= -1;
continue;
} else if (res->ty != IdentResolTy_Label) {
REPORTF_ERROR("'%s' cannot be declared global", res->ident);
reporter_print_loc(rep, res->loc);
REPORTF_INFO(
"'%s' declared global here", builder->global_syms[i].ident);
reporter_print_loc(rep, builder->global_syms[i].loc);
result |= -1;
continue;
}
builder->global_syms[i].ip = res->ip;
}
return result;
}
typedef struct {
FILE* fp;
const char* filename;
} Writer;
static inline size_t write_words(
Writer* writer, const uint16_t* values, size_t values_size)
{
size_t amount_written
= fwrite(values, sizeof(uint16_t), values_size, writer->fp);
if (amount_written != values_size) {
REPORTF_ERROR("could not write to output file '%s': %s",
writer->filename,
strerror(errno));
return 0;
}
return amount_written;
}
static inline size_t write_word(Writer* writer, uint16_t value)
{
return write_words(writer, &value, 1);
}
int header_builder_write(const HeaderBuilder* builder,
FILE* fp,
size_t* total_written,
const char* filename)
{
typedef uint16_t W;
size_t header_size = 5 + 1 + 1 + 1 + 1 + 1;
for (size_t i = 0; i < builder->global_syms_size; ++i)
header_size += 1 + 1 + strlen(builder->global_syms[i].ident);
for (size_t i = 0; i < builder->extern_syms_size; ++i)
header_size += 1 + 1 + strlen(builder->extern_syms[i].ident);
for (size_t i = 0; i < builder->extern_refs_size; ++i)
header_size += 1 + 1;
header_size *= 2;
#define TRY_WRITE_WORDS(VALUES, VALUES_SIZE) \
do { \
if ((res = write_words(w, (VALUES), (VALUES_SIZE))) == 0) \
return -1; \
*total_written += res; \
} while (0)
#define TRY_WRITE_WORD(VALUE) \
do { \
if ((res = write_word(w, (VALUE))) == 0) \
return -1; \
*total_written += res; \
} while (0)
Writer writer = { fp, filename };
Writer* w = &writer;
*total_written = 0;
size_t res;
const char* ident = "vc3-object";
TRY_WRITE_WORDS((W*)ident, 5);
TRY_WRITE_WORD((W)header_size);
TRY_WRITE_WORD((W)builder->global_syms_size);
for (size_t i = 0; i < builder->global_syms_size; ++i) {
const GlobalSym* sym = &builder->global_syms[i];
TRY_WRITE_WORD(sym->ip);
size_t ident_len = strlen(sym->ident);
TRY_WRITE_WORD((W)ident_len);
// This is safe because of null termination.
TRY_WRITE_WORDS((W*)sym->ident, (ident_len + 1) / 2);
}
TRY_WRITE_WORD((W)builder->extern_syms_size);
for (size_t i = 0; i < builder->extern_syms_size; ++i) {
const ExternSym* sym = &builder->extern_syms[i];
TRY_WRITE_WORD((W)sym->id);
size_t ident_len = strlen(sym->ident);
TRY_WRITE_WORD((W)ident_len);
// This is safe because of null termination.
TRY_WRITE_WORDS((W*)sym->ident, (ident_len + 1) / 2);
}
TRY_WRITE_WORD((W)builder->extern_refs_size);
for (size_t i = 0; i < builder->extern_refs_size; ++i) {
const ExternRef* ref = &builder->extern_refs[i];
TRY_WRITE_WORD((W)ref->id);
TRY_WRITE_WORD(ref->ip);
}
TRY_WRITE_WORD((uint16_t)(builder->program_size * 2));
#undef TRY_WRITE_WORDS
#undef TRY_WRITE_WORD
return 0;
}
void header_builder_print(const HeaderBuilder* builder)
{
printf("globals:\n");
printf("address symbol\n");
printf("-----------------------\n");
for (size_t i = 0; i < builder->global_syms_size; ++i) {
GlobalSym* sym = &builder->global_syms[i];
printf("%04x, %s\n", sym->ip, sym->ident);
}
printf("\nexterns:\n");
printf("id symbol\n");
printf("--------------------\n");
for (size_t i = 0; i < builder->extern_syms_size; ++i) {
ExternSym* sym = &builder->extern_syms[i];
printf("%-3ld %s\n", sym->id, sym->ident);
}
printf("\nextern uses:\n");
printf("id address\n");
printf("-----------\n");
for (size_t i = 0; i < builder->extern_refs_size; ++i) {
ExternRef* ref = &builder->extern_refs[i];
printf("%3ld %04x\n", ref->id, ref->ip);
}
}

25
asm/build_header.h Normal file
View File

@ -0,0 +1,25 @@
#pragma once
#include "report.h"
#include "resolve.h"
#include <stddef.h>
#include <stdint.h>
#include <stdio.h>
typedef struct HeaderBuilder HeaderBuilder;
HeaderBuilder* header_builder_new(void);
void header_builder_free(HeaderBuilder* builder);
void header_builder_add_global_sym(
HeaderBuilder* builder, char* ident, Loc loc, uint16_t ip);
void header_builder_add_extern_sym(
HeaderBuilder* builder, size_t id, char* ident);
void header_builder_add_extern_ref(
HeaderBuilder* builder, size_t id, uint16_t ip);
int header_builder_resolve_global_syms(
HeaderBuilder* builder, IdentResolver* resolver, Reporter* rep);
int header_builder_write(const HeaderBuilder* builder,
FILE* fp,
size_t* total_written,
const char* filename);
void header_builder_print(const HeaderBuilder* builder);

View File

@ -1,7 +1,8 @@
#include "asm/report.h"
#include "assemble.h"
#include "build_header.h"
#include "eval.h"
#include "parse.h"
#include "report.h"
#include "resolve.h"
#include "str.h"
#include <assert.h>
@ -18,22 +19,6 @@ typedef struct {
const char* output_file;
} Args;
typedef struct {
char* ident;
Loc loc;
uint16_t ip;
} GlobalSym;
typedef struct {
size_t id;
char* ident;
} ExternSym;
typedef struct {
size_t id;
uint16_t ip;
} ExternRef;
static inline Args parse_args(int argc, char** argv);
static inline char* read_text_file(const char* filename);
static inline int include_file(IdentResolver* resolver,
@ -71,19 +56,13 @@ int main(int argc, char** argv)
OperandEvaluator evaluator;
operand_evaluator_construct(&evaluator, &resolver, &rep);
HeaderBuilder* header_builder = header_builder_new();
size_t lines_capacity = 1024;
PLine** lines = malloc(sizeof(PLine*) * lines_capacity);
size_t lines_size = 0;
size_t global_symbols_capacity = 8;
GlobalSym* global_symbols
= malloc(sizeof(GlobalSym) * global_symbols_capacity);
size_t global_symbols_size = 0;
size_t extern_symbols_capacity = 8;
ExternSym* extern_symbols
= malloc(sizeof(ExternSym) * extern_symbols_capacity);
size_t extern_symbols_size = 0;
size_t extern_ids = 0;
while (!parser_done(&parser)) {
PStmt* stmt = parser_next_stmt(&parser);
@ -101,16 +80,8 @@ int main(int argc, char** argv)
free(stmt);
break;
case PStmtTy_Global:
if (global_symbols_size + 1 > global_symbols_capacity) {
global_symbols_capacity *= 2;
global_symbols = realloc(global_symbols,
sizeof(GlobalSym) * global_symbols_capacity);
}
global_symbols[global_symbols_size++] = (GlobalSym) {
.ident = asm_strdup(stmt->ident),
.loc = stmt->loc,
.ip = 0,
};
header_builder_add_global_sym(
header_builder, asm_strdup(stmt->ident), stmt->loc, 0);
pstmt_free(stmt);
break;
case PStmtTy_Extern: {
@ -121,18 +92,11 @@ int main(int argc, char** argv)
pstmt_free(stmt);
continue;
}
size_t id = extern_symbols_size;
size_t id = extern_ids++;
ident_resolver_define_extern(
&resolver, asm_strdup(stmt->ident), stmt->loc, id);
if (extern_symbols_size + 1 > extern_symbols_capacity) {
extern_symbols_capacity *= 2;
extern_symbols = realloc(extern_symbols,
sizeof(ExternSym) * extern_symbols_capacity);
}
extern_symbols[extern_symbols_size++] = (ExternSym) {
.ident = asm_strdup(stmt->ident),
.id = id,
};
header_builder_add_extern_sym(
header_builder, id, asm_strdup(stmt->ident));
pstmt_free(stmt);
break;
}
@ -171,11 +135,7 @@ int main(int argc, char** argv)
}
}
errors_occured &= parser_error_occured(&parser);
size_t extern_refs_capacity = 8;
ExternRef* extern_refs = malloc(sizeof(ExternRef) * extern_refs_capacity);
size_t extern_refs_size = 0;
errors_occured |= parser_error_occured(&parser);
size_t chunk_capacity = 64;
uint16_t* chunk = malloc(sizeof(uint16_t) * chunk_capacity);
@ -199,16 +159,8 @@ int main(int argc, char** argv)
if (chunk[chunk_i] == ex->surrogate) {
found = true;
if (extern_refs_size + 1 > extern_refs_capacity) {
extern_refs_capacity *= 2;
extern_refs = realloc(extern_refs,
sizeof(ExternRef) * extern_refs_capacity);
}
extern_refs[extern_refs_size++] = (ExternRef) {
.id = ex->extern_id,
.ip = ip + chunk_i,
};
header_builder_add_extern_ref(
header_builder, ex->extern_id, ip + chunk_i);
break;
}
}
@ -217,23 +169,11 @@ int main(int argc, char** argv)
ip += size;
}
uint16_t program_size = ip;
for (size_t i = 0; i < global_symbols_size; ++i) {
const IdentResol* res
= ident_resolver_resolve(&resolver, global_symbols[i].ident);
if (res == NULL) {
REPORTF_ERROR("undefined global '%s'", global_symbols[i].ident);
REPORTF_INFO("'%s' declared global here", global_symbols[i].ident);
reporter_print_loc(&rep, global_symbols[i].loc);
continue;
} else if (res->ty != IdentResolTy_Label) {
REPORTF_ERROR("'%s' cannot be declared global", res->ident);
reporter_print_loc(&rep, res->loc);
REPORTF_INFO("'%s' declared global here", global_symbols[i].ident);
reporter_print_loc(&rep, global_symbols[i].loc);
continue;
}
global_symbols[i].ip = res->ip;
res = header_builder_resolve_global_syms(header_builder, &resolver, &rep);
if (res != 0) {
errors_occured = true;
}
if (errors_occured) {
@ -242,29 +182,7 @@ int main(int argc, char** argv)
goto leave_free_chunk;
}
evaluator.second_pass = true;
printf("globals:\n");
printf("symbol address\n");
printf("-----------------------\n");
for (size_t i = 0; i < global_symbols_size; ++i) {
GlobalSym* sym = &global_symbols[i];
printf("%-15s %d\n", sym->ident, sym->ip);
}
printf("\nexterns:\n");
printf("id symbol\n");
printf("--------------------\n");
for (size_t i = 0; i < extern_symbols_size; ++i) {
ExternSym* sym = &extern_symbols[i];
printf("%-3ld %s\n", sym->id, sym->ident);
}
printf("\nextern uses:\n");
printf("id address\n");
printf("-----------\n");
for (size_t i = 0; i < extern_refs_size; ++i) {
ExternRef* ref = &extern_refs[i];
printf("%3ld %04x\n", ref->id, ref->ip);
}
header_builder_print(header_builder);
FILE* output_fp = fopen(args.output_file, "wb");
if (!output_fp) {
@ -275,7 +193,18 @@ int main(int argc, char** argv)
goto leave_free_chunk;
}
size_t total_bytes_written = 0;
size_t total_words_written;
res = header_builder_write(
header_builder, output_fp, &total_words_written, args.output_file);
if (res != 0) {
errors_occured = true;
fclose(output_fp);
res = -1;
goto leave_free_chunk;
}
evaluator.second_pass = true;
for (size_t i = 0; i < lines_size; ++i) {
use_labels(&resolver, lines[i]->labels);
@ -287,9 +216,10 @@ int main(int argc, char** argv)
if (errors_occured)
break;
size_t bytes_written = fwrite(chunk, sizeof(uint16_t), size, output_fp);
total_bytes_written += bytes_written;
if (bytes_written != size) {
size_t amount_written
= fwrite(chunk, sizeof(uint16_t), size, output_fp);
total_words_written += amount_written;
if (amount_written != size) {
REPORTF_ERROR("could not write to output file '%s': %s",
args.output_file,
strerror(errno));
@ -300,7 +230,7 @@ int main(int argc, char** argv)
if (errors_occured) {
fprintf(
stderr, "%ld bytes written. stopping...\n", total_bytes_written);
stderr, "%ld bytes written. stopping...\n", total_words_written);
res = -1;
goto leave_free_chunk;
}
@ -311,13 +241,7 @@ leave_free_chunk:
// leave_free_lines:
for (size_t i = 0; i < lines_size; ++i)
pline_free(lines[i]);
for (size_t i = 0; i < global_symbols_size; ++i)
free(global_symbols[i].ident);
free(global_symbols);
for (size_t i = 0; i < extern_symbols_size; ++i)
free(extern_symbols[i].ident);
free(extern_symbols);
free(extern_refs);
header_builder_free(header_builder);
free(lines);
free(input_text);
operand_evaluator_destroy(&evaluator);
@ -441,7 +365,7 @@ static inline int include_file(IdentResolver* resolver,
pstmt_free(stmt);
}
errors_occured &= parser_error_occured(&parser);
errors_occured |= parser_error_occured(&parser);
free(filepath);
free(text);

44
readme.txt Normal file
View File

@ -0,0 +1,44 @@
vc3
- 16-bit instruction set
- virtual machine
- assembler
little endian (because x86_64 also is)
w word = 16
b byte = 8
object file format:
begin file
begin header
5w identifier "vc3-object"
1w header byte size
1w global table byte size
for each global in global table
1w symbol address
1w symbol identifier byte size
*w symbol identifier data 2-byte aligned
end
1w extern table byte size
for each extern in extern table
1w extern id
1w symbol identifier byte size
*w symbol identifier data 2-byte aligned
end
1w extern use table byte size
for each used extern in extern use table
1w extern id
1w address
end
1w code size
end
begin code
*w code 2-byte aligned
end
end