From 89f20f8a35c374b1075c01fb73351fafbf809a58 Mon Sep 17 00:00:00 2001
From: sfja <sfja2004@gmail.com>
Date: Thu, 3 Apr 2025 01:44:31 +0200
Subject: [PATCH] external works

---
 asm/eval.c    |  79 +++++++++++++++++++++++++-
 asm/eval.h    |  18 ++++++
 asm/main.c    | 151 ++++++++++++++++++++++++++++++++++++++------------
 asm/resolve.c |  15 +++++
 asm/resolve.h |   4 ++
 kern/main.asm |   1 +
 6 files changed, 231 insertions(+), 37 deletions(-)

diff --git a/asm/eval.c b/asm/eval.c
index 696f55b..2ec3a1c 100644
--- a/asm/eval.c
+++ b/asm/eval.c
@@ -7,6 +7,70 @@
 #include <stdio.h>
 #include <stdlib.h>
 
+void operand_evaluator_construct(
+    OperandEvaluator* evaluator, IdentResolver* re, Reporter* rep)
+{
+    const size_t used_imms_capacity = 64;
+    const size_t externals_capacity = 64;
+
+    *evaluator = (OperandEvaluator) {
+        .re = re,
+        .rep = rep,
+        .second_pass = false,
+        .used_imms = malloc(sizeof(uint16_t) * used_imms_capacity),
+        .used_imms_size = 0,
+        .externals = malloc(sizeof(SurrogateExternal) * externals_capacity),
+        .externals_size = 0,
+    };
+}
+
+void operand_evaluator_destroy(OperandEvaluator* evaluator)
+{
+    free(evaluator->used_imms);
+    free(evaluator->externals);
+}
+
+void operand_evaluator_reset_externals(OperandEvaluator* evaluator)
+{
+    evaluator->used_imms_size = 0;
+    evaluator->externals_size = 0;
+}
+
+void operand_evaluator_use_imm(OperandEvaluator* evaluator, uint16_t imm)
+{
+    for (size_t i = 0; i < evaluator->used_imms_size; ++i)
+        if (evaluator->used_imms[i] == imm)
+            return;
+    evaluator->used_imms[evaluator->used_imms_size++] = imm;
+}
+
+uint16_t operand_evaluator_use_external(
+    OperandEvaluator* evaluator, const IdentResol* resol)
+{
+    uint16_t surrogate;
+    if (evaluator->externals_size > 0 && evaluator->used_imms_size > 0) {
+        surrogate
+            = evaluator->externals[evaluator->externals_size - 1].surrogate - 1;
+        while (surrogate > 0) {
+            bool used = false;
+            for (size_t i = 0; i < evaluator->used_imms_size; ++i)
+                if (evaluator->used_imms[i] == surrogate)
+                    used = true;
+            if (!used)
+                break;
+            used -= 1;
+        }
+    } else {
+        surrogate = 0xffff;
+    }
+    evaluator->externals[evaluator->externals_size++] = (SurrogateExternal) {
+        .resol = resol,
+        .extern_id = resol->extern_id,
+        .surrogate = surrogate,
+    };
+    return surrogate;
+}
+
 static inline uint16_t eval_poperandty_unary(POperandTy ty, uint16_t operand)
 {
     switch (ty) {
@@ -87,7 +151,11 @@ EvaledOperand eval_operand_to_imm(
                     return (EvaledOperand) { .ty = EoTy_Imm, .imm = re->ip };
                 case IdentResolTy_Const:
                     return (EvaledOperand) { .ty = EoTy_Imm, .imm = re->value };
-                    break;
+                case IdentResolTy_Extern:
+                    return (EvaledOperand) {
+                        .ty = EoTy_Imm,
+                        .imm = operand_evaluator_use_external(evaluator, re),
+                    };
             }
             fprintf(stderr, "unreachable\n");
             exit(1);
@@ -299,8 +367,13 @@ EvaledOperand eval_operand(OperandEvaluator* evaluator, POperand* operand)
         case PoTy_Sub:
         case PoTy_Mul:
         case PoTy_Div:
-        case PoTy_Mod:
-            return eval_operand_to_imm(evaluator, operand);
+        case PoTy_Mod: {
+            EvaledOperand evaled = eval_operand_to_imm(evaluator, operand);
+            if (evaled.ty == EoTy_Imm) {
+                operand_evaluator_use_imm(evaluator, evaled.imm);
+            }
+            return evaled;
+        }
     }
     fprintf(stderr, "unreachable\n");
     exit(1);
diff --git a/asm/eval.h b/asm/eval.h
index c7e42ec..21fdf23 100644
--- a/asm/eval.h
+++ b/asm/eval.h
@@ -26,12 +26,30 @@ typedef struct {
     uint16_t offset;
 } EvaledOperand;
 
+typedef struct {
+    const IdentResol* resol;
+    size_t extern_id;
+    uint16_t surrogate;
+} SurrogateExternal;
+
 typedef struct {
     IdentResolver* re;
     Reporter* rep;
     bool second_pass;
+
+    uint16_t* used_imms;
+    size_t used_imms_size;
+
+    SurrogateExternal* externals;
+    size_t externals_size;
 } OperandEvaluator;
 
+void operand_evaluator_construct(
+    OperandEvaluator* evaluator, IdentResolver* re, Reporter* rep);
+void operand_evaluator_destroy(OperandEvaluator* evaluator);
+void operand_evaluator_reset_externals(OperandEvaluator* evaluator);
+void operand_evaluator_use_imm(OperandEvaluator* evaluator, uint16_t imm);
+
 EvaledOperand eval_operand_to_imm(
     OperandEvaluator* evaluator, POperand* operand);
 EvaledOperand eval_operand_indirection_expr(
diff --git a/asm/main.c b/asm/main.c
index 74f2b01..533cb67 100644
--- a/asm/main.c
+++ b/asm/main.c
@@ -24,12 +24,24 @@ typedef struct {
     uint16_t ip;
 } GlobalSym;
 
+typedef struct {
+    size_t id;
+    char* ident;
+} ExternSym;
+
+typedef struct {
+    size_t id;
+    uint16_t ip;
+} ExternRef;
+
 static inline Args parse_args(int argc, char** argv);
 static inline char* read_text_file(const char* filename);
 static inline int include_file(IdentResolver* resolver,
     OperandEvaluator* evaluator,
     const char* origin,
     const char* filename);
+static inline void report_redefinition(
+    const IdentResol* existing, PStmt* stmt, Reporter* rep);
 static inline int define_labels(
     IdentResolver* resolver, PLabel* label, uint16_t asm_ip, Reporter* rep);
 static inline void use_labels(IdentResolver* resolver, PLabel* label);
@@ -56,11 +68,8 @@ int main(int argc, char** argv)
     IdentResolver resolver;
     ident_resolver_construct(&resolver);
 
-    OperandEvaluator evaluator = {
-        .re = &resolver,
-        .rep = &rep,
-        .second_pass = false,
-    };
+    OperandEvaluator evaluator;
+    operand_evaluator_construct(&evaluator, &resolver, &rep);
 
     size_t lines_capacity = 1024;
     PLine** lines = malloc(sizeof(PLine*) * lines_capacity);
@@ -71,6 +80,11 @@ int main(int argc, char** argv)
         = malloc(sizeof(GlobalSym) * global_symbols_capacity);
     size_t global_symbols_size = 0;
 
+    size_t extern_symbols_capacity = 8;
+    ExternSym* extern_symbols
+        = malloc(sizeof(ExternSym) * extern_symbols_capacity);
+    size_t extern_symbols_size = 0;
+
     while (!parser_done(&parser)) {
         PStmt* stmt = parser_next_stmt(&parser);
         if (!stmt) {
@@ -99,25 +113,34 @@ int main(int argc, char** argv)
                 };
                 pstmt_free(stmt);
                 break;
-            case PStmtTy_Extern:
+            case PStmtTy_Extern: {
+                const IdentResol* existing
+                    = ident_resolver_resolve(&resolver, stmt->ident);
+                if (existing != NULL) {
+                    report_redefinition(existing, stmt, &rep);
+                    pstmt_free(stmt);
+                    continue;
+                }
+                size_t id = extern_symbols_size;
+                ident_resolver_define_extern(
+                    &resolver, asm_strdup(stmt->ident), stmt->loc, id);
+                if (extern_symbols_size + 1 > extern_symbols_capacity) {
+                    extern_symbols_capacity *= 2;
+                    extern_symbols = realloc(extern_symbols,
+                        sizeof(ExternSym) * extern_symbols_capacity);
+                }
+                extern_symbols[extern_symbols_size++] = (ExternSym) {
+                    .ident = asm_strdup(stmt->ident),
+                    .id = id,
+                };
                 pstmt_free(stmt);
                 break;
+            }
             case PStmtTy_Const: {
                 const IdentResol* existing
                     = ident_resolver_resolve(&resolver, stmt->ident);
                 if (existing != NULL) {
-                    REPORTF_ERROR(
-                        "redefinition of identifier '%s'", stmt->ident);
-                    reporter_print_loc(&rep, stmt->loc);
-                    const char* filename = rep.filename;
-                    if (existing->ty == IdentResolTy_Const) {
-                        filename = existing->src_filename;
-                    }
-                    REPORTF_INFO(
-                        "previous definition of '%s' here", existing->ident);
-                    reporter_print_loc(&rep, existing->loc);
-                    rep.filename = filename;
-                    errors_occured = true;
+                    report_redefinition(existing, stmt, &rep);
                     pstmt_free(stmt);
                     continue;
                 }
@@ -150,17 +173,48 @@ int main(int argc, char** argv)
 
     errors_occured &= parser_error_occured(&parser);
 
+    size_t extern_refs_capacity = 8;
+    ExternRef* extern_refs = malloc(sizeof(ExternRef) * extern_refs_capacity);
+    size_t extern_refs_size = 0;
+
     size_t chunk_capacity = 64;
     uint16_t* chunk = malloc(sizeof(uint16_t) * chunk_capacity);
 
     uint16_t ip = 0;
     for (size_t i = 0; i < lines_size; ++i) {
+        operand_evaluator_reset_externals(&evaluator);
+
         int res = define_labels(&resolver, lines[i]->labels, ip, &rep);
         if (res != 0)
             errors_occured = true;
+
         uint16_t size = pline_assemble(&evaluator, chunk, lines[i], &rep);
         if (size == 0)
             errors_occured = true;
+
+        for (size_t i = 0; i < evaluator.externals_size; ++i) {
+            const SurrogateExternal* ex = &evaluator.externals[i];
+            bool found = false;
+            for (uint16_t chunk_i = 1; chunk_i < size; ++chunk_i) {
+                if (chunk[chunk_i] == ex->surrogate) {
+                    found = true;
+
+                    if (extern_refs_size + 1 > extern_refs_capacity) {
+                        extern_refs_capacity *= 2;
+                        extern_refs = realloc(extern_refs,
+                            sizeof(ExternRef) * extern_refs_capacity);
+                    }
+                    extern_refs[extern_refs_size++] = (ExternRef) {
+                        .id = ex->extern_id,
+                        .ip = ip + chunk_i,
+                    };
+
+                    break;
+                }
+            }
+            assert(found);
+        }
+
         ip += size;
     }
 
@@ -190,6 +244,28 @@ int main(int argc, char** argv)
 
     evaluator.second_pass = true;
 
+    printf("globals:\n");
+    printf("symbol          address\n");
+    printf("-----------------------\n");
+    for (size_t i = 0; i < global_symbols_size; ++i) {
+        GlobalSym* sym = &global_symbols[i];
+        printf("%-15s %d\n", sym->ident, sym->ip);
+    }
+    printf("\nexterns:\n");
+    printf("id  symbol\n");
+    printf("--------------------\n");
+    for (size_t i = 0; i < extern_symbols_size; ++i) {
+        ExternSym* sym = &extern_symbols[i];
+        printf("%-3ld %s\n", sym->id, sym->ident);
+    }
+    printf("\nextern uses:\n");
+    printf("id  address\n");
+    printf("-----------\n");
+    for (size_t i = 0; i < extern_refs_size; ++i) {
+        ExternRef* ref = &extern_refs[i];
+        printf("%3ld %04x\n", ref->id, ref->ip);
+    }
+
     FILE* output_fp = fopen(args.output_file, "wb");
     if (!output_fp) {
         REPORTF_ERROR("could not open output file '%s': %s",
@@ -209,7 +285,7 @@ int main(int argc, char** argv)
         }
 
         if (errors_occured)
-            continue;
+            break;
 
         size_t bytes_written = fwrite(chunk, sizeof(uint16_t), size, output_fp);
         total_bytes_written += bytes_written;
@@ -233,15 +309,18 @@ int main(int argc, char** argv)
 leave_free_chunk:
     free(chunk);
     // leave_free_lines:
-    for (size_t i = 0; i < lines_size; ++i) {
+    for (size_t i = 0; i < lines_size; ++i)
         pline_free(lines[i]);
-    }
-    for (size_t i = 0; i < global_symbols_size; ++i) {
+    for (size_t i = 0; i < global_symbols_size; ++i)
         free(global_symbols[i].ident);
-    }
     free(global_symbols);
+    for (size_t i = 0; i < extern_symbols_size; ++i)
+        free(extern_symbols[i].ident);
+    free(extern_symbols);
+    free(extern_refs);
     free(lines);
     free(input_text);
+    operand_evaluator_destroy(&evaluator);
     ident_resolver_destroy(&resolver);
     return res;
 }
@@ -328,17 +407,7 @@ static inline int include_file(IdentResolver* resolver,
                         pstmt_free(stmt);
                         continue;
                     }
-                    REPORTF_ERROR(
-                        "redefinition of identifier '%s'", stmt->ident);
-                    reporter_print_loc(&rep, stmt->loc);
-                    const char* filename = rep.filename;
-                    if (existing->ty == IdentResolTy_Const) {
-                        filename = existing->src_filename;
-                    }
-                    REPORTF_INFO(
-                        "previous definition of '%s' here", existing->ident);
-                    reporter_print_loc(&rep, existing->loc);
-                    rep.filename = filename;
+                    report_redefinition(existing, stmt, &rep);
                     errors_occured = true;
                     pstmt_free(stmt);
                     continue;
@@ -379,6 +448,20 @@ static inline int include_file(IdentResolver* resolver,
     return errors_occured;
 }
 
+static inline void report_redefinition(
+    const IdentResol* existing, PStmt* stmt, Reporter* rep)
+{
+    REPORTF_ERROR("redefinition of identifier '%s'", stmt->ident);
+    reporter_print_loc(rep, stmt->loc);
+    const char* filename = rep->filename;
+    if (existing->ty == IdentResolTy_Const) {
+        filename = existing->src_filename;
+    }
+    REPORTF_INFO("previous definition of '%s' here", existing->ident);
+    reporter_print_loc(rep, existing->loc);
+    rep->filename = filename;
+}
+
 static inline char* read_text_file(const char* filename)
 {
     FILE* fp = fopen(filename, "r");
diff --git a/asm/resolve.c b/asm/resolve.c
index ee65c0f..287aa28 100644
--- a/asm/resolve.c
+++ b/asm/resolve.c
@@ -15,6 +15,9 @@ void ident_resol_destroy(IdentResol* resol)
             free(resol->ident);
             free(resol->src_filename);
             break;
+        case IdentResolTy_Extern:
+            free(resol->ident);
+            break;
     }
 }
 
@@ -97,6 +100,18 @@ void ident_resolver_define_const(IdentResolver* resolver,
     };
 }
 
+void ident_resolver_define_extern(
+    IdentResolver* resolver, char* ident, Loc loc, size_t extern_id)
+{
+    size_t i = ident_resolver_first_empty(resolver);
+    resolver->resols[i] = (IdentResol) {
+        .ident = ident,
+        .loc = loc,
+        .ty = IdentResolTy_Extern,
+        .extern_id = extern_id,
+    };
+}
+
 const IdentResol* ident_resolver_resolve(
     const IdentResolver* resolver, const char* ident)
 {
diff --git a/asm/resolve.h b/asm/resolve.h
index ef40472..27f4710 100644
--- a/asm/resolve.h
+++ b/asm/resolve.h
@@ -8,6 +8,7 @@ typedef enum {
     IdentResolTy_Label,
     IdentResolTy_SubLabel,
     IdentResolTy_Const,
+    IdentResolTy_Extern,
 } IdentResolTy;
 
 typedef struct IdentResol IdentResol;
@@ -22,6 +23,7 @@ struct IdentResol {
             uint16_t value;
             char* src_filename;
         };
+        size_t extern_id;
     };
 };
 
@@ -46,5 +48,7 @@ void ident_resolver_define_const(IdentResolver* resolver,
     Loc loc,
     uint16_t value,
     char* src_filename);
+void ident_resolver_define_extern(
+    IdentResolver* resolver, char* ident, Loc loc, size_t extern_id);
 const IdentResol* ident_resolver_resolve(
     const IdentResolver* resolver, const char* ident);
diff --git a/kern/main.asm b/kern/main.asm
index b13d736..21a9a0a 100644
--- a/kern/main.asm
+++ b/kern/main.asm
@@ -20,6 +20,7 @@ start:
     int Int_DiskRead
 
     jmp other
+    jmp other
 
 main_loop:
     hlt