commit 181f237f4d4306b00eb10db2998c9147a9e4d120 Author: sfja Date: Wed Dec 10 22:46:32 2025 +0100 init diff --git a/deno.jsonc b/deno.jsonc new file mode 100644 index 0000000..2b9e474 --- /dev/null +++ b/deno.jsonc @@ -0,0 +1,6 @@ +{ + "fmt": { + "indentWidth": 4 + } +} + diff --git a/deno.lock b/deno.lock new file mode 100644 index 0000000..ee29d8e --- /dev/null +++ b/deno.lock @@ -0,0 +1,15 @@ +{ + "version": "5", + "specifiers": { + "jsr:@std/cli@*": "1.0.24", + "jsr:@std/yaml@*": "1.0.10" + }, + "jsr": { + "@std/cli@1.0.24": { + "integrity": "b655a5beb26aa94f98add6bc8889f5fb9bc3ee2cc3fc954e151201f4c4200a5e" + }, + "@std/yaml@1.0.10": { + "integrity": "245706ea3511cc50c8c6d00339c23ea2ffa27bd2c7ea5445338f8feff31fa58e" + } + } +} diff --git a/example.lang4 b/example.lang4 new file mode 100644 index 0000000..a656dd0 --- /dev/null +++ b/example.lang4 @@ -0,0 +1,20 @@ + +fn main() -> int { + let v: int = 123; + + let ch = 'c'; + let s = "hello\ world"; + + print_int(v); + print_str(v); +} + + +fn print_int(v: int) { +} + +fn print_str(v: str) { +} + +// vim: syntax=rust + diff --git a/src/ast.ts b/src/ast.ts new file mode 100644 index 0000000..79e5ac6 --- /dev/null +++ b/src/ast.ts @@ -0,0 +1,271 @@ +export class File { + constructor( + public readonly id: number, + public readonly stmts: Stmt[], + ) {} + + visit(v: Visitor) { + v.visitFile?.(this); + this.stmts.forEach((stmt) => stmt.visit(v)); + } +} + +export class Block { + constructor( + public readonly id: number, + public readonly line: number, + public readonly stmts: Stmt[], + public readonly expr?: Expr, + ) {} + + visit(v: Visitor) { + if (v.visitBlock?.(this) === VisitorBreak) { + return; + } + this.stmts.forEach((stmt) => stmt.visit(v)); + this.expr?.visit(v); + } +} + +export class Stmt { + constructor( + public readonly id: number, + public readonly line: number, + public readonly kind: StmtKind, + ) {} + + visit(v: Visitor) { + if (v.visitStmt?.(this) === VisitorBreak) { + return; + } + const k = this.kind; + switch (k.tag) { + case "error": + break; + case "fn": + k.params.forEach((param) => param.visit(v)); + k.retTy?.visit(v); + k.body.visit(v); + break; + case "let": + k.param.visit(v); + k.init.visit(v); + break; + case "assign": + k.place.visit(v); + k.expr.visit(v); + break; + case "expr": + k.expr.visit(v); + break; + } + } +} + +export type StmtKind = + | { tag: "error" } + | { tag: "fn"; ident: string; params: Param[]; retTy?: Ty; body: Block } + | { tag: "let"; param: Param; init: Expr } + | { tag: "assign"; place: Expr; expr: Expr } + | { tag: "expr"; expr: Expr }; + +export class Expr { + constructor( + public readonly id: number, + public readonly line: number, + public readonly kind: ExprKind, + ) {} + + visit(v: Visitor) { + if (v.visitExpr?.(this) === VisitorBreak) { + return; + } + const k = this.kind; + switch (k.tag) { + case "error": + break; + case "ident": + break; + case "int": + break; + case "char": + break; + case "str": + break; + case "call": + k.expr.visit(v); + k.args.forEach((expr) => expr.visit(v)); + break; + } + } +} + +export type ExprKind = + | { tag: "error" } + | { tag: "ident"; ident: string } + | { tag: "int"; value: string } + | { tag: "char"; value: string } + | { tag: "str"; value: string } + | { tag: "call"; expr: Expr; args: Expr[] }; + +export class Param { + constructor( + public readonly id: number, + public readonly line: number, + public readonly pat: Pat, + public readonly ty?: Ty, + ) {} + + visit(v: Visitor) { + if (v.visitParam?.(this) === VisitorBreak) { + return; + } + this.pat.visit(v); + this.ty?.visit(v); + } +} + +export class Pat { + constructor( + public readonly id: number, + public readonly line: number, + public readonly kind: PatKind, + ) {} + + visit(v: Visitor) { + if (v.visitPat?.(this) === VisitorBreak) { + return; + } + const k = this.kind; + switch (k.tag) { + case "error": + break; + case "ident": + break; + } + } +} + +export type PatKind = + | { tag: "error" } + | { tag: "ident"; ident: string }; + +export class Ty { + constructor( + public readonly id: number, + public readonly line: number, + public readonly kind: TyKind, + ) {} + + visit(v: Visitor) { + if (v.visitTy?.(this) === VisitorBreak) { + return; + } + const k = this.kind; + switch (k.tag) { + case "error": + break; + case "ident": + break; + } + } +} + +export type TyKind = + | { tag: "error" } + | { tag: "ident"; ident: string }; + +export const VisitorBreak = Symbol(); +export type VisitorBreak = typeof VisitorBreak; + +export interface Visitor { + visitFile?(file: File): void | VisitorBreak; + visitBlock?(block: Block): void | VisitorBreak; + visitStmt?(stmt: Stmt): void | VisitorBreak; + visitExpr?(expr: Expr): void | VisitorBreak; + visitParam?(param: Param): void | VisitorBreak; + visitPat?(pat: Pat): void | VisitorBreak; + visitTy?(ty: Ty): void | VisitorBreak; +} + +export class AstBuilder { + private id = 0; + + file( + stmts: Stmt[], + ): File { + return new File( + this.id++, + stmts, + ); + } + + block( + line: number, + stmts: Stmt[], + expr?: Expr, + ): Block { + return new Block( + this.id++, + line, + stmts, + expr, + ); + } + + stmt( + line: number, + tag: Tag, + kind: Omit, + ): Stmt { + return new Stmt( + this.id++, + line, + { tag, ...kind } as StmtKind, + ); + } + + expr( + line: number, + tag: Tag, + kind: Omit, + ): Expr { + return new Expr( + this.id++, + line, + { tag, ...kind } as ExprKind, + ); + } + + param( + line: number, + pat: Pat, + ty?: Ty, + ): Param { + return new Param(this.id++, line, pat, ty); + } + + pat( + line: number, + tag: Tag, + kind: Omit, + ): Pat { + return new Pat( + this.id++, + line, + { tag, ...kind } as PatKind, + ); + } + + ty( + line: number, + tag: Tag, + kind: Omit, + ): Ty { + return new Ty( + this.id++, + line, + { tag, ...kind } as TyKind, + ); + } +} diff --git a/src/main.ts b/src/main.ts new file mode 100644 index 0000000..3c4e299 --- /dev/null +++ b/src/main.ts @@ -0,0 +1,22 @@ +import * as yaml from "jsr:@std/yaml"; +import { tokenize } from "./tok.ts"; +import { Parser } from "./parse.ts"; +import { Resolver } from "./resolve.ts"; + +async function main() { + const text = await Deno.readTextFile(Deno.args[0]); + const toks = tokenize(text); + // console.log({ toks }); + + const parser = new Parser(toks); + const file = parser.parseFile(); + + // console.log(yaml.stringify({ file }, { skipInvalid: true, indent: 2 })); + // console.log(JSON.stringify({ file }, null, 2)); + + file.visit(new Resolver()); + + // console.log(cx); +} + +await main(); diff --git a/src/parse.ts b/src/parse.ts new file mode 100644 index 0000000..3a125e2 --- /dev/null +++ b/src/parse.ts @@ -0,0 +1,254 @@ +import { AstBuilder, Block, Expr, File, Param, Pat, Stmt, Ty } from "./ast.ts"; +import { Tok } from "./tok.ts"; + +const t = new AstBuilder(); + +export class Parser { + private i = 0; + private eaten?: Tok; + + constructor( + private toks: Tok[], + ) {} + + parseFile(): File { + const stmts: Stmt[] = []; + while (!this.done) { + stmts.push(this.parseItem()); + } + return t.file(stmts); + } + + parseItem(): Stmt { + const line = this.line(); + if (this.test("fn")) { + return this.parseFn(); + } else if (this.test("let")) { + return this.parseFn(); + } else { + this.expect("item"); + this.step(); + return t.stmt(line, "error", {}); + } + } + + parseBlock(): Block { + const line = this.line(); + this.step(); + + const stmts: Stmt[] = []; + let expr: Expr | undefined = undefined; + + while (!this.done && !this.test("}")) { + const line = this.line(); + if (this.test("fn")) { + stmts.push(this.parseFn()); + } else if (this.test("let")) { + stmts.push(this.parseLet()); + this.expect(";"); + } else { + const lhs = this.parseExpr(); + if (this.eat("=")) { + const rhs = this.parseExpr(); + this.expect(";"); + stmts.push( + t.stmt(line, "assign", { place: lhs, expr: rhs }), + ); + } else if (this.eat(";")) { + stmts.push(t.stmt(line, "expr", { expr: lhs })); + } else if (this.test("}")) { + expr = lhs; + break; + } else { + this.expect(";"); + } + } + } + + this.expect("}"); + return t.block(line, stmts, expr); + } + + parseFn(): Stmt { + const line = this.line(); + this.step(); + if (!this.expect("ident")) { + return t.stmt(line, "error", {}); + } + const ident = this.eaten!.value!; + const params: Param[] = []; + if (!this.expect("(")) { + return t.stmt(line, "error", {}); + } + if (!this.done && !this.test(")")) { + params.push(this.parseParam()); + while (this.eat(",")) { + if (this.done || this.test(")")) { + break; + } + params.push(this.parseParam()); + } + } + if (!this.expect(")")) { + return t.stmt(line, "error", {}); + } + let retTy: Ty | undefined = undefined; + if (this.eat("->")) { + retTy = this.parseTy(); + } + if (!this.test("{")) { + this.expect("{"); + return t.stmt(line, "error", {}); + } + const body = this.parseBlock(); + return t.stmt(line, "fn", { ident, params, retTy, body }); + } + + parseLet(): Stmt { + const line = this.line(); + this.step(); + const param = this.parseParam(); + if (!this.expect("=")) { + return t.stmt(line, "error", {}); + } + const init = this.parseExpr(); + return t.stmt(line, "let", { param, init }); + } + + parseExpr(): Expr { + return this.parsePostfix(); + } + + parsePostfix(): Expr { + let expr = this.parseOp(); + while (true) { + const line = this.line(); + if (this.eat("(")) { + const args: Expr[] = []; + if (!this.done && !this.test(")")) { + args.push(this.parseExpr()); + while (this.eat(",")) { + if (this.done || this.test(")")) { + break; + } + args.push(this.parseExpr()); + } + } + if (!this.expect(")")) { + return t.expr(line, "error", {}); + } + expr = t.expr(line, "call", { expr, args }); + } else { + break; + } + } + return expr; + } + + parseOp(): Expr { + const line = this.line(); + if (this.eat("ident")) { + const ident = this.eaten!.value!; + return t.expr(line, "ident", { ident }); + } else if (this.eat("int")) { + const value = this.eaten!.value!; + return t.expr(line, "int", { value }); + } else if (this.eat("char")) { + const value = this.eaten!.value!; + return t.expr(line, "int", { value }); + } else if (this.eat("str")) { + const value = this.eaten!.value!; + return t.expr(line, "int", { value }); + } else { + this.expect("expr"); + this.step(); + return t.expr(line, "error", {}); + } + } + + parseParam(): Param { + const line = this.line(); + const pat = this.parsePat(); + let ty: Ty | undefined = undefined; + if (this.eat(":")) { + ty = this.parseTy(); + } + return t.param(line, pat, ty); + } + + parsePat(): Pat { + const line = this.line(); + if (this.eat("ident")) { + const ident = this.eaten!.value!; + return t.pat(line, "ident", { ident }); + } else { + this.expect("pat"); + this.step(); + return t.pat(line, "error", {}); + } + } + + parseTy(): Ty { + const line = this.line(); + if (this.eat("ident")) { + const ident = this.eaten!.value!; + return t.ty(line, "ident", { ident }); + } else { + this.expect("ty"); + this.step(); + return t.ty(line, "error", {}); + } + } + + private expect(type: string): boolean { + const line = this.line(); + if (!this.eat(type)) { + if (this.done) { + this.error(line, `expected '${type}', got 'eof'`); + } else { + this.error(line, `expected '${type}', got '${this.tok.type}'`); + } + return false; + } + return true; + } + + private eat(type: string): boolean { + if (this.test(type)) { + this.eaten = this.tok; + this.step(); + return true; + } + return false; + } + + private test(type: string): boolean { + return !this.done && this.tok.type === type; + } + + private step() { + this.i += 1; + } + + private line(): number { + return this.tok.line; + } + + private get tok(): Tok { + return this.toks[this.i]; + } + + private get done(): boolean { + return this.i >= this.toks.length; + } + + private error(line: number, message: string) { + console.error( + `%cerror%c: ${message}\n %c--> line ${line}%c`, + "font-weight: bold; color: red", + "font-weight: bold; color: while", + "color: cyan", + "", + ); + } +} diff --git a/src/resolve.ts b/src/resolve.ts new file mode 100644 index 0000000..621858c --- /dev/null +++ b/src/resolve.ts @@ -0,0 +1,81 @@ +import { Pat, Stmt, Visitor, VisitorBreak } from "./ast.ts"; + +export class Def { + constructor( + kind: DefKind, + ) {} +} + +export type DefKind = + | { tag: "fn"; stmt: Stmt } + | { tag: "param"; stmt: Stmt; pat: Pat } + | { tag: "let"; stmt: Stmt; pat: Pat }; + +export class Rib { + private syms = new Map(); + + constructor( + private kind: RibKind, + ) {} + + define(ident: string, def: Def) { + this.syms.set(ident, def); + } +} + +export type RibKind = + | { tag: "root" } + | { tag: "fn"; parent: Rib } + | { tag: "block"; parent: Rib } + | { tag: "let"; parent: Rib }; + +export class Resolver implements Visitor { + private rib = new Rib({ tag: "root" }); + private ribStateStack: Rib[] = []; + + pushRib( + tag: Tag, + kind: Omit, + ) { + this.rib = new Rib({ tag, parent: this.rib, ...kind } as RibKind); + } + + saveRibState() { + this.ribStateStack.push(this.rib); + } + + restoreRibState() { + this.rib = this.ribStateStack.pop()!; + } + + define(ident: string, def: Def) { + this.rib.define(ident, def); + } + + visitStmt(stmt: Stmt): void | VisitorBreak { + const k = stmt.kind; + if (k.tag === "fn") { + this.define(k.ident, new Def({ tag: "fn", stmt })); + this.saveRibState(); + this.pushRib("fn", {}); + k.params.forEach((param) => param.visit(this)); + k.retTy?.visit(this); + k.body.visit(this); + this.restoreRibState(); + return VisitorBreak; + } else if (k.tag === "let") { + this.pushRib("let", {}); + k.param.pat.visit({ + visitPat: (pat: Pat) => { + const k = pat.kind; + if (k.tag === "ident") { + this.define( + k.ident, + new Def({ tag: "let", stmt, pat }), + ); + } + }, + }); + } + } +} diff --git a/src/tok.ts b/src/tok.ts new file mode 100644 index 0000000..3baeccf --- /dev/null +++ b/src/tok.ts @@ -0,0 +1,77 @@ +export type Tok = { + type: string; + line: number; + value?: string; +}; + +const keywords = new Set([ + "fn", + "let", + "true", + "false", +]); + +type OpTree = Map; +const opTreeRoot: OpTree = new Map( + Object.entries({ + "-": new Map(Object.entries({ + ">": null, + })), + }), +); + +export function tokenize(text: string): Tok[] { + return text + .replace(/\/\/[^\n]*/g, "") + .replace(/\/\*.*?\*\//gs, "") + .replace(/([^a-zA-Z0-9_'"\\ \t\r])/g, " $1 ") + .split(/(? tok !== "") + .map((tok) => tok.replace(/\\ /g, " ")) + .reduce<[string[], OpTree]>(([toks, opTree], tok) => { + if (toks.length === 0) { + toks.push(tok); + return [toks, opTree]; + } + const last = toks.at(-1)!; + if (!opTree.has(last)) { + toks.push(tok); + return [toks, opTreeRoot]; + } + if (opTree.get(last) === null) { + toks.push(tok); + return [toks, opTreeRoot]; + } else if (opTree.get(last)!.has(tok)) { + toks[toks.length - 1] += tok; + return [toks, opTree.get(last)!]; + } else { + toks.push(tok); + return [toks, opTreeRoot]; + } + }, [[], opTreeRoot])[0] + .slice(0, -1) + .reduce<[Tok[], number]>(([toks, line], type) => { + if (type === "\n") { + return [toks, line + 1]; + } else { + toks.push({ type, line }); + return [toks, line]; + } + }, [[], 1])[0] + .map((tok) => { + if ( + /^[a-zA-Z_][a-zA-Z0-9_]*$/.test(tok.type) && + !keywords.has(tok.type) + ) { + return { type: "ident", line: tok.line, value: tok.type }; + } else if (/^[0-9_]+$/.test(tok.type)) { + return { type: "int", line: tok.line, value: tok.type }; + } else if (/^'.*?'$/.test(tok.type)) { + return { type: "char", line: tok.line, value: tok.type }; + } else if (/^".*?"$/.test(tok.type)) { + return { type: "str", line: tok.line, value: tok.type }; + } else { + return tok; + } + }); +}