245 lines
9.4 KiB
TypeScript
245 lines
9.4 KiB
TypeScript
// monomorphized function (ast-)graphs
|
|
|
|
import { Expr, Stmt } from "./ast.ts";
|
|
import { AstVisitor, visitExpr, VisitRes, visitStmts } from "./ast_visitor.ts";
|
|
import { VType } from "./vtype.ts";
|
|
|
|
export type MonomorphizedFn = {
|
|
mid: string;
|
|
stmt: Stmt;
|
|
genericArgs?: VType[];
|
|
};
|
|
|
|
export function monomorphizeFunctionGraphs(ast: Stmt[]): MonomorphizedFn[] {
|
|
const allFns = new AllFnsCollector().collect(ast);
|
|
const mainFn = findMain(allFns);
|
|
return [
|
|
...new Monomorphizer(allFns)
|
|
.monomorphize(mainFn)
|
|
.values(),
|
|
];
|
|
}
|
|
|
|
function findMain(fns: Map<number, Stmt>): Stmt {
|
|
const mainId = fns.values().find((stmt) =>
|
|
stmt.kind.type === "fn" && stmt.kind.ident === "main"
|
|
);
|
|
if (mainId === undefined) {
|
|
console.error("error: cannot find function 'main'");
|
|
console.error(
|
|
`
|
|
Hear me out. Monomorphization, meaning the process
|
|
inwich generic functions are stamped out into seperate
|
|
specialized functions is actually really hard, and I
|
|
have a really hard time right now, figuring out, how
|
|
to do it in a smart way. To really explain it, let's
|
|
imagine you have a function, you defined as a<T>().
|
|
For each call with seperate generics arguments given,
|
|
such as a::<int>() and a::<string>(), a specialized
|
|
function has to be 'stamped out', ie. created and put
|
|
into the compilation with the rest of the program. Now
|
|
to the reason as to why 'main' is needed. To do the
|
|
monomorphization, we have to do it recursively. To
|
|
explain this, imagine you have a generic function a<T>
|
|
and inside the body of a<T>, you call another generic
|
|
function such as b<T> with the same generic type. This
|
|
means that the monomorphization process of b<T> depends
|
|
on the monomorphization of a<T>. What this essentially
|
|
means, is that the monomorphization process works on
|
|
the program as a call graph, meaning a graph or tree
|
|
structure where each represents a function call to
|
|
either another function or a recursive call to the
|
|
function itself. But a problem arises from doing it
|
|
this way, which is that a call graph will need an
|
|
entrypoint. The language, as it is currently, does
|
|
not really require a 'main'-function. Or maybe it
|
|
does, but that's beside the point. The point is that
|
|
we need a main function, to be the entry point for
|
|
the call graph. The monomorphization process then
|
|
runs through the program from that entry point. This
|
|
means that each function we call, will itself be
|
|
monomorphized and added to the compilation. It also
|
|
means that functions that are not called, will also
|
|
not be added to the compilation. This essentially
|
|
eliminates uncalled/dead functions. Is this
|
|
particularly smart to do in such a high level part
|
|
of the compilation process? I don't know. It's
|
|
obvious that we can't just use every function as
|
|
an entry point in the call graph, because we're
|
|
actively added new functions. Additionally, with
|
|
generic functions, we don't know, if they're the
|
|
entry point, what generic arguments, they should
|
|
be monomorphized with. We could do monomorphization
|
|
the same way C++ does it, where all non-generic
|
|
functions before monomorphization are treated as
|
|
entry points in the call graph. But this has the
|
|
drawback that generic and non-generic functions
|
|
are treated differently, which has many underlying
|
|
drawbacks, especially pertaining to the amount of
|
|
work needed to handle both in all proceeding steps
|
|
of the compiler. Anyways, I just wanted to yap and
|
|
complain about the way generics and monomorphization
|
|
has made the compiler 100x more complicated, and
|
|
that I find it really hard to implement in a way,
|
|
that is not either too simplistic or so complicated
|
|
and advanced I'm too dumb to implement it. So if
|
|
you would be so kind as to make it clear to the
|
|
compiler, what function it should designate as
|
|
the entry point to the call graph, it will use
|
|
for monomorphization, that would be very kind of
|
|
you. The way you do this, is by added or selecting
|
|
one of your current functions and giving it the
|
|
name of 'main'. This is spelled m-a-i-n. The word
|
|
is synonemous with the words primary and principle.
|
|
The name is meant to designate the entry point into
|
|
the program, which is why the monomorphization
|
|
process uses this specific function as the entry
|
|
point into the call graph, it generates. So if you
|
|
would be so kind as to do that, that would really
|
|
make my day. In any case, keep hacking ferociously
|
|
on whatever you're working on. I have monomorphizer
|
|
to implement. See ya. -Your favorite compiler girl <3
|
|
`.replaceAll(" ", "").trim(),
|
|
);
|
|
throw new Error("cannot find function 'main'");
|
|
}
|
|
return mainId;
|
|
}
|
|
|
|
class AllFnsCollector implements AstVisitor {
|
|
private allFns = new Map<number, Stmt>();
|
|
|
|
public collect(ast: Stmt[]): Map<number, Stmt> {
|
|
visitStmts(ast, this);
|
|
return this.allFns;
|
|
}
|
|
|
|
visitFnStmt(stmt: Stmt): VisitRes {
|
|
if (stmt.kind.type !== "fn") {
|
|
throw new Error();
|
|
}
|
|
this.allFns.set(stmt.id, stmt);
|
|
}
|
|
}
|
|
|
|
class Monomorphizer {
|
|
private monomorphizedFns = new Map<string, MonomorphizedFn>();
|
|
|
|
public constructor(private allFns: Map<number, Stmt>) {}
|
|
|
|
public monomorphize(mainFn: Stmt): Map<string, MonomorphizedFn> {
|
|
this.monomorphizeFn(mainFn);
|
|
return this.monomorphizedFns;
|
|
}
|
|
|
|
private monomorphizeFn(stmt: Stmt, genericArgs?: VType[]) {
|
|
const calls = new FnBodyCallCollector().collect(stmt);
|
|
for (const expr of calls) {
|
|
if (expr.kind.type !== "call") {
|
|
throw new Error();
|
|
}
|
|
const vtype = expr.kind.subject.vtype!;
|
|
if (vtype.type === "fn") {
|
|
const stmt = this.allFns.get(vtype.fnStmtId)!;
|
|
if (stmt.kind.type !== "fn") {
|
|
throw new Error();
|
|
}
|
|
const mid = fnCallMid(expr, stmt);
|
|
if (!this.monomorphizedFns.has(mid)) {
|
|
this.monomorphizedFns.set(mid, { mid, stmt });
|
|
this.monomorphizeFn(stmt);
|
|
}
|
|
return;
|
|
} else if (vtype.type === "generic_args") {
|
|
if (vtype.subject.type !== "fn") {
|
|
throw new Error();
|
|
}
|
|
const stmt = this.allFns.get(vtype.subject.fnStmtId)!;
|
|
if (stmt.kind.type !== "fn") {
|
|
throw new Error();
|
|
}
|
|
const mid = fnCallMid(expr, stmt);
|
|
if (!this.monomorphizedFns.has(mid)) {
|
|
this.monomorphizedFns.set(mid, { mid, stmt, genericArgs });
|
|
this.monomorphizeFn(stmt, vtype.genericArgs);
|
|
}
|
|
return;
|
|
}
|
|
throw new Error();
|
|
}
|
|
}
|
|
}
|
|
|
|
class FnBodyCallCollector implements AstVisitor {
|
|
private calls: Expr[] = [];
|
|
|
|
public collect(stmt: Stmt): Expr[] {
|
|
if (stmt.kind.type !== "fn") {
|
|
throw new Error();
|
|
}
|
|
visitExpr(stmt.kind.body, this);
|
|
return this.calls;
|
|
}
|
|
|
|
visitCallExpr(expr: Expr): VisitRes {
|
|
if (expr.kind.type !== "call") {
|
|
throw new Error();
|
|
}
|
|
this.calls.push(expr);
|
|
}
|
|
}
|
|
|
|
export function fnCallMid(expr: Expr, stmt: Stmt) {
|
|
console.log(expr);
|
|
if (expr.kind.type !== "call") {
|
|
throw new Error();
|
|
}
|
|
const vtype = expr.kind.subject.vtype!;
|
|
if (vtype.type === "fn") {
|
|
return fnStmtMid(stmt);
|
|
} else if (vtype.type === "generic_args") {
|
|
if (vtype.subject.type !== "fn") {
|
|
throw new Error();
|
|
}
|
|
return fnStmtMid(stmt, vtype.genericArgs);
|
|
}
|
|
throw new Error();
|
|
}
|
|
|
|
export function fnStmtMid(stmt: Stmt, genericArgs?: VType[]) {
|
|
if (stmt.kind.type !== "fn") {
|
|
throw new Error();
|
|
}
|
|
const { kind: { ident }, id } = stmt;
|
|
if (genericArgs !== undefined) {
|
|
const genericArgsStr = genericArgs
|
|
.map((arg) => vtypeMidPart(arg))
|
|
.join("_");
|
|
return `${ident}_${id}_${genericArgsStr}`;
|
|
} else {
|
|
return ident === "main" ? "main" : `${ident}_${id}`;
|
|
}
|
|
}
|
|
|
|
export function vtypeMidPart(vtype: VType): string {
|
|
switch (vtype.type) {
|
|
case "string":
|
|
case "int":
|
|
case "bool":
|
|
case "null":
|
|
case "unknown":
|
|
return vtype.type;
|
|
case "array":
|
|
return `array(${vtypeMidPart(vtype.inner)})`;
|
|
case "struct":
|
|
return `struct(${vtype.structId})`;
|
|
case "fn":
|
|
return `fn(${vtype.fnStmtId})`;
|
|
case "error":
|
|
throw new Error("error in type");
|
|
case "generic":
|
|
case "generic_args":
|
|
throw new Error("cannot be monomorphized");
|
|
}
|
|
}
|