From b805d9b48f7b05a19240f9fabfbca3652663af8c Mon Sep 17 00:00:00 2001 From: sfja Date: Tue, 18 Mar 2025 02:10:45 +0100 Subject: [PATCH] init --- .gitignore | 2 + ast.ts | 36 ++++ defs.structemit | 15 ++ deno.jsonc | 5 + deno.lock | 37 ++++ generate_parser.sh | 6 + main.ts | 428 +++++++++++++++++++++++++++++++++++++++++++++ parser.config.js | 12 ++ parser.pegjs | 60 +++++++ 9 files changed, 601 insertions(+) create mode 100644 .gitignore create mode 100644 ast.ts create mode 100644 defs.structemit create mode 100644 deno.jsonc create mode 100644 deno.lock create mode 100755 generate_parser.sh create mode 100644 main.ts create mode 100644 parser.config.js create mode 100644 parser.pegjs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..de00540 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +parser.out.* + diff --git a/ast.ts b/ast.ts new file mode 100644 index 0000000..be35794 --- /dev/null +++ b/ast.ts @@ -0,0 +1,36 @@ +export type Struct = { + id: number; + ident: string; + loc: Loc; + generics: Generics | null; + fields: Field[]; +}; + +export type Generics = { params: string[] }; + +export type Field = { + ident: string; + loc: Loc; + ty: Ty; +}; + +export type Ty = { + id: number; + loc: Loc; + kind: TyKind; +}; + +export type TyKind = + | { tag: "array"; length: number | null; ty: Ty } + | { tag: "generic"; ident: string; args: Ty[] } + | { tag: "struct_literal"; fields: Field[] } + | { tag: "ident"; ident: string } + | { tag: "int"; value: number }; + +export type Loc = { + source: string; + start: Pos; + end: Pos; +}; + +export type Pos = { offset: number; line: number; column: number }; diff --git a/defs.structemit b/defs.structemit new file mode 100644 index 0000000..ed409cd --- /dev/null +++ b/defs.structemit @@ -0,0 +1,15 @@ + +struct Product { + product_id: int, + name: str, + price_dkk_cent: int, + amount: T, +} + +struct Receipt { + receipt_id: int, + timestamp: str, + products: Product[], +} + +// vim: syntax=rust commentstring=//\ %s diff --git a/deno.jsonc b/deno.jsonc new file mode 100644 index 0000000..3fecf5f --- /dev/null +++ b/deno.jsonc @@ -0,0 +1,5 @@ +{ + "fmt": { + "indentWidth": 4 + } +} diff --git a/deno.lock b/deno.lock new file mode 100644 index 0000000..bc89f33 --- /dev/null +++ b/deno.lock @@ -0,0 +1,37 @@ +{ + "version": "4", + "specifiers": { + "jsr:@std/yaml@*": "1.0.5", + "npm:peggy@*": "4.2.0" + }, + "jsr": { + "@std/yaml@1.0.5": { + "integrity": "71ba3d334305ee2149391931508b2c293a8490f94a337eef3a09cade1a2a2742" + } + }, + "npm": { + "@peggyjs/from-mem@1.3.5": { + "integrity": "sha512-oRyzXE7nirAn+5yYjCdWQHg3EG2XXcYRoYNOK8Quqnmm+9FyK/2YWVunwudlYl++M3xY+gIAdf0vAYS+p0nKfQ==", + "dependencies": [ + "semver" + ] + }, + "commander@12.1.0": { + "integrity": "sha512-Vw8qHK3bZM9y/P10u3Vib8o/DdkvA2OtPtZvD871QKjy74Wj1WSKFILMPRPSdUSx5RFK1arlJzEtA4PkFgnbuA==" + }, + "peggy@4.2.0": { + "integrity": "sha512-ZjzyJYY8NqW8JOZr2PbS/J0UH/hnfGALxSDsBUVQg5Y/I+ZaPuGeBJ7EclUX2RvWjhlsi4pnuL1C/K/3u+cDeg==", + "dependencies": [ + "@peggyjs/from-mem", + "commander", + "source-map-generator" + ] + }, + "semver@7.6.3": { + "integrity": "sha512-oVekP1cKtI+CTDvHWYFUcMtsK/00wmAEfyqKfNdARm8u1wNVhSgaX7A8d4UuIlUI5e84iEwOhs7ZPYRmzU9U6A==" + }, + "source-map-generator@0.8.0": { + "integrity": "sha512-psgxdGMwl5MZM9S3FWee4EgsEaIjahYV5AzGnwUvPhWeITz/j6rKpysQHlQ4USdxvINlb8lKfWGIXwfkrgtqkA==" + } + } +} diff --git a/generate_parser.sh b/generate_parser.sh new file mode 100755 index 0000000..61f57b5 --- /dev/null +++ b/generate_parser.sh @@ -0,0 +1,6 @@ +#!/bin/bash + +# npx peggy --dts structemit.pegjs --format es -m -o parser.out.js +npx peggy -c parser.config.js + + diff --git a/main.ts b/main.ts new file mode 100644 index 0000000..287d225 --- /dev/null +++ b/main.ts @@ -0,0 +1,428 @@ +import * as parser from "./parser.out.js"; +import * as yaml from "jsr:@std/yaml"; +import * as ast from "./ast.ts"; + +async function main() { + const filename = Deno.args[0]; + const text = await Deno.readTextFile(filename); + + const ast = ((): ast.Struct[] | undefined => { + try { + return parser.parse(text, { + grammarSource: filename, + }); + } catch (e) { + if (e instanceof parser.SyntaxError) { + console.error( + e.format([{ source: filename, text }]), + ); + return undefined; + } + throw e; + } + })(); + if (!ast) { + return; + } + //console.log(yaml.stringify(ast)); + + const rep = new Reporter(text); + + const re = new Resolver(ast, rep).resolve(); + const units = new DependencyTracer(ast, re, rep).trace(); + const unitMap = mapUnitIds(units); + + new CircularDependencyFinder(unitMap, rep) + .findCircularDependencies(); + + const entries = entryPoints(units, rep); + + if (rep.errorOccured) { + console.log("Errors occured. Stopping..."); + } +} + +type MonoUnit = { + id: number; + unit: Unit; + generics?: Map; +}; + +class Monomorphizer { + public constructor( + private units: Unit[], + ) {} + + public monomorph(): MonoUnit[] { + const monoUnits: MonoUnit[] = []; + for (const unit of this.units) { + } + return monoUnits; + } +} + +class CircularDependencyFinder { + private hasCirc = new Set(); + + public constructor( + private units: Map, + private rep: Reporter, + ) {} + + public findCircularDependencies() { + for (const id of this.units.keys()) { + this.searchDependencies(id); + } + } + + private searchDependencies( + id: number, + defined = new Set(), + ) { + if (this.hasCirc.has(id)) { + return; + } + defined.add(id); + const unit = this.units.get(id)!; + for (const depId of unit.dependencies) { + const dep = this.units.get(depId)!; + if (defined.has(depId)) { + this.rep.error( + `circular dependency between '${unit.struct.ident}' and '${dep.struct.ident}'`, + ); + this.hasCirc.add(unit.id); + return; + } + this.searchDependencies(depId, new Set(defined)); + } + } +} + +function entryPoints(units: Unit[], rep: Reporter): Set { + const entries = units.filter((unit) => unit.dependencies.size === 0); + if (entries.length === 0) { + rep.error( + "no entry points in dependency graph (everything depends on something else)", + ); + } + return new Set(entries.map((entry) => entry.struct.id)); +} + +function mapUnitIds(units: Unit[]): Map { + return new Map(units.map((v, i) => [i, v])); +} + +type Unit = { + id: number; + struct: ast.Struct; + dependencies: Set; +}; + +class DependencyTracer { + public constructor( + private ast: ast.Struct[], + private re: Map, + private rep: Reporter, + ) {} + + public trace(): Unit[] { + const units: Unit[] = []; + for (const struct of this.ast) { + const dependencies = new StructTracer(struct, this.re).trace(); + units.push({ id: struct.id, struct, dependencies }); + } + return units; + } +} + +class StructTracer { + private dependencies = new Set(); + + public constructor( + private struct: ast.Struct, + private re: Map, + ) {} + + public trace(): Set { + for (const field of this.struct.fields) { + this.traceTy(field.ty); + } + return this.dependencies; + } + + private traceTy(ty: ast.Ty) { + const k = ty.kind; + switch (k.tag) { + case "array": + this.traceTy(k.ty); + return; + case "generic": { + const re = this.re.get(ty.id)!; + if (re.tag !== "struct") { + return; + } + this.dependencies.add(re.struct.id); + for (const ty of k.args) { + this.traceTy(ty); + } + return; + } + case "struct_literal": + for (const field of k.fields) { + this.traceTy(field.ty); + } + return; + case "ident": { + const re = this.re.get(ty.id)!; + if (re.tag !== "struct") { + return; + } + this.dependencies.add(re.struct.id); + return; + } + case "int": + return; + } + const _: never = k; + } +} + +type Resol = + | { tag: "error" } + | { tag: "undefined" } + | { tag: "struct"; struct: ast.Struct } + | { tag: "generic"; struct: ast.Struct; idx: number } + | { tag: "primitive" }; + +type Rib = { + defs: Map; +}; + +class Resolver { + private resols = new Map(); + private ribs: Rib[] = [{ defs: new Map() }]; + + public constructor( + private ast: ast.Struct[], + private rep: Reporter, + ) {} + + public resolve(): Map { + for (const struct of this.ast) { + const res = this.define(struct.ident, { tag: "struct", struct }); + if (res === "already defined") { + this.reportAlreadyDefined(struct.ident, struct.loc); + } + } + for (const struct of this.ast) { + this.ribs.push({ defs: new Map() }); + if (struct.generics) { + for (const [idx, ident] of struct.generics.params.entries()) { + const res = this.define(ident, { + tag: "generic", + struct, + idx, + }); + if (res === "already defined") { + this.reportAlreadyDefined(ident, struct.loc); + } + } + } + const fields = new Map(); + for (const field of struct.fields) { + this.resolveTy(field.ty); + if (fields.has(field.ident)) { + this.rep.error( + `field '${field.ident}' already defined`, + field.loc, + ); + const otherField = fields.get(field.ident)!; + this.rep.info( + `original field '${otherField.ident}' defined here`, + otherField.loc, + ); + continue; + } + fields.set(field.ident, field); + } + this.ribs.pop(); + } + return this.resols; + } + + private resolveTy(ty: ast.Ty) { + const k = ty.kind; + switch (k.tag) { + case "array": + this.resolveTy(k.ty); + return; + case "generic": { + const re = this.resolveIdent(k.ident); + if (re.tag !== "struct") { + this.rep.error(`identifier '${k.ident}' is not a struct`); + this.resols.set(ty.id, { tag: "error" }); + return; + } + if (!re.struct.generics) { + this.rep.error( + `struct '${re.struct.ident}' does not accept generics`, + ); + this.resols.set(ty.id, { tag: "error" }); + return; + } + if (re.struct.generics.params.length !== k.args.length) { + this.rep.error( + `incorrect amount of generic paramters, expected ${re.struct.generics.params.length}, got ${k.args.length}`, + ); + this.resols.set(ty.id, { tag: "error" }); + return; + } + this.resols.set(ty.id, re); + for (const arg of k.args) { + this.resolveTy(arg); + } + return; + } + case "struct_literal": { + const fields = new Map(); + for (const field of k.fields) { + this.resolveTy(field.ty); + if (fields.has(field.ident)) { + this.rep.error( + `field '${field.ident}' already defined`, + field.loc, + ); + const otherField = fields.get(field.ident)!; + this.rep.info( + `original field '${otherField.ident}' defined here`, + otherField.loc, + ); + continue; + } + fields.set(field.ident, field); + } + return; + } + case "ident": { + const re = this.resolveIdent(k.ident); + if (re.tag === "undefined") { + this.rep.error( + `identifier '${k.ident}' is not defined`, + ty.loc, + ); + } + this.resols.set(ty.id, re); + return; + } + case "int": + return; + } + const _: never = k; + } + + private reportAlreadyDefined(ident: string, loc: ast.Loc) { + const re = this.resolveIdent(ident); + switch (re.tag) { + case "error": + case "undefined": + throw new Error(); + case "struct": + this.rep.error(`identifier '${ident}' already defined`, loc); + this.rep.info( + `struct '${re.struct.ident}' defined here`, + re.struct.loc, + ); + break; + case "generic": + this.rep.error(`identifier '${ident}' already defined`, loc); + this.rep.info( + `generic parameter '${ + re.struct.generics!.params[re.idx] + }' defined here`, + re.struct.loc, + ); + break; + case "primitive": + this.rep.error(`cannot redefine primitive '${ident}'`, loc); + break; + } + } + + private static primitives = ["bool", "int", "float", "str"]; + + private resolveIdent(ident: string): Resol { + if (Resolver.primitives.includes(ident)) { + return { tag: "primitive" }; + } + for (const rib of this.ribs.toReversed()) { + if (rib.defs.has(ident)) { + return rib.defs.get(ident)!; + } + } + return { tag: "undefined" }; + } + + private define(ident: string, re: Resol): "ok" | "already defined" { + if (this.rib().defs.has(ident)) { + return "already defined"; + } + this.rib().defs.set(ident, re); + return "ok"; + } + + private rib(): Rib { + return this.ribs.at(-1)!; + } +} + +class Reporter { + public errorOccured = false; + + public constructor( + private text: string, + ) {} + + public error(msg: string, loc?: ast.Loc) { + this.errorOccured = true; + console.error( + `%cerror%c: ${msg}%c`, + "color: red; font-weight: bold", + "font-weight: bold", + "", + ); + if (loc) { + this.printLoc(loc); + } + } + + public info(msg: string, loc?: ast.Loc) { + console.error( + `%cerror%c: ${msg}%c`, + "color: cyan; font-weight: bold", + "font-weight: bold", + "", + ); + if (loc) { + this.printLoc(loc); + } + } + + private printLoc(loc: ast.Loc) { + const line = this.text.split("\n")[loc.start.line - 1]; + const posPad = " ".repeat(loc.start.column - 1); + const lineNr = loc.start.line.toString().padStart(3, " "); + const lPad = " ".repeat(lineNr.length + 1); + const pos = `./${loc.source}:${loc.start.line}:${loc.start.column}`; + console.error( + `%c --> ${pos}\n${lPad}|\n${lineNr} |%c${line}%c\n${lPad}|${posPad}^%c`, + "color: gray", + "color: lightgray", + "color: gray", + "", + ); + } +} + +if (import.meta.main) { + main(); +} diff --git a/parser.config.js b/parser.config.js new file mode 100644 index 0000000..bd41be0 --- /dev/null +++ b/parser.config.js @@ -0,0 +1,12 @@ +// MJS +export default { + input: "parser.pegjs", + output: "parser.out.js", + format: "es", + sourceMap: true, + dts: true, + returnTypes: { + Ident: "string", + Int: "number", + }, +}; diff --git a/parser.pegjs b/parser.pegjs new file mode 100644 index 0000000..5ef2b19 --- /dev/null +++ b/parser.pegjs @@ -0,0 +1,60 @@ +{{ + let defIds = 0; + let tyIds = 0; + + const Ty = (kind, loc) => ({ id: tyIds++, loc, kind }); +}} + + +Defs = _ defs:StructDef|.., _ | _ { return defs; } + +StructDef + = "struct" _ ident:Ident _ generics:Generics? _ "{" fields:Fields "}" + { return { id: defIds++, ident, loc: location(), generics, fields } } + +Fields = _ fields:FieldDef|.., _ "," _| _ ","? _ { return fields; } + +Generics = "<" _ params:Ident|.., _ "," _| _ ","? _ ">" { return { params }; } + +FieldDef + = ident:Ident _ ":" _ ty:Ty + { return { ident, loc: location(), ty }; } + +Ty = ty:Ty4 { return ty; } + +Ty4 + = ty:Ty3 pairs:(_ "[" _ len:Ty? _ "]" { return { length: len, loc: location() }; })* + { return pairs + .reduce((inner, {length, loc: {end}}) => + Ty({ tag: "array", length, ty: inner }, {...inner.loc, end}), + ty); } + / Ty3 + +Ty3 + = ident:Ident _ "<" _ args:Ty|.., _ ", " _| _ ","? _ ">" + { return Ty({ tag: "generic", ident, args }, location); } + / Ty2 + +Ty2 + = "{" fields:Fields "}" + { return Ty({ tag: "struct_literal", fields }, location()); } + / Ty1 + +Ty1 "type" + = ident:Ident { return Ty({ tag: "ident", ident }, location()) } + / value:Int { return Ty({ tag: "int", value }, location); } + +Ident "identifier" + = [a-zA-Z_][a-zA-Z0-9_]* { return text(); } + +Int "integer" + = ("0" / [1-9][0-9]*) { return parseInt(text()); } + +_ "whitespace" + = (WhiteSpaceChars / SingleLineComment)* + +WhiteSpaceChars = [ \t\n\r] +SingleLineComment = "//" (!"\n" .)* + +// vim: syntax=typescript commentstring=//\ %s +