slige/compiler/mfg.ts

245 lines
9.4 KiB
TypeScript
Raw Normal View History

2024-12-25 04:19:32 +00:00
// monomorphized function (ast-)graphs
import { Expr, Stmt } from "./ast.ts";
import { AstVisitor, visitExpr, VisitRes, visitStmts } from "./ast_visitor.ts";
import { VType } from "./vtype.ts";
export type MonomorphizedFn = {
mid: string;
stmt: Stmt;
genericArgs?: VType[];
};
export function monomorphizeFunctionGraphs(ast: Stmt[]): MonomorphizedFn[] {
const allFns = new AllFnsCollector().collect(ast);
const mainFn = findMain(allFns);
return [
...new Monomorphizer(allFns)
.monomorphize(mainFn)
.values(),
];
}
function findMain(fns: Map<number, Stmt>): Stmt {
const mainId = fns.values().find((stmt) =>
stmt.kind.type === "fn" && stmt.kind.ident === "main"
);
if (mainId === undefined) {
console.error("error: cannot find function 'main'");
console.error(
`
Hear me out. Monomorphization, meaning the process
inwich generic functions are stamped out into seperate
specialized functions is actually really hard, and I
have a really hard time right now, figuring out, how
to do it in a smart way. To really explain it, let's
imagine you have a function, you defined as a<T>().
For each call with seperate generics arguments given,
such as a::<int>() and a::<string>(), a specialized
function has to be 'stamped out', ie. created and put
into the compilation with the rest of the program. Now
to the reason as to why 'main' is needed. To do the
monomorphization, we have to do it recursively. To
explain this, imagine you have a generic function a<T>
and inside the body of a<T>, you call another generic
function such as b<T> with the same generic type. This
means that the monomorphization process of b<T> depends
on the monomorphization of a<T>. What this essentially
means, is that the monomorphization process works on
the program as a call graph, meaning a graph or tree
structure where each represents a function call to
either another function or a recursive call to the
function itself. But a problem arises from doing it
this way, which is that a call graph will need an
entrypoint. The language, as it is currently, does
not really require a 'main'-function. Or maybe it
does, but that's beside the point. The point is that
we need a main function, to be the entry point for
the call graph. The monomorphization process then
runs through the program from that entry point. This
means that each function we call, will itself be
monomorphized and added to the compilation. It also
means that functions that are not called, will also
not be added to the compilation. This essentially
eliminates uncalled/dead functions. Is this
particularly smart to do in such a high level part
of the compilation process? I don't know. It's
obvious that we can't just use every function as
an entry point in the call graph, because we're
actively added new functions. Additionally, with
generic functions, we don't know, if they're the
entry point, what generic arguments, they should
be monomorphized with. We could do monomorphization
the same way C++ does it, where all non-generic
functions before monomorphization are treated as
entry points in the call graph. But this has the
drawback that generic and non-generic functions
are treated differently, which has many underlying
drawbacks, especially pertaining to the amount of
work needed to handle both in all proceeding steps
of the compiler. Anyways, I just wanted to yap and
complain about the way generics and monomorphization
has made the compiler 100x more complicated, and
that I find it really hard to implement in a way,
that is not either too simplistic or so complicated
and advanced I'm too dumb to implement it. So if
you would be so kind as to make it clear to the
compiler, what function it should designate as
the entry point to the call graph, it will use
for monomorphization, that would be very kind of
you. The way you do this, is by added or selecting
one of your current functions and giving it the
name of 'main'. This is spelled m-a-i-n. The word
is synonemous with the words primary and principle.
The name is meant to designate the entry point into
the program, which is why the monomorphization
process uses this specific function as the entry
point into the call graph, it generates. So if you
would be so kind as to do that, that would really
make my day. In any case, keep hacking ferociously
on whatever you're working on. I have monomorphizer
to implement. See ya. -Your favorite compiler girl <3
`.replaceAll(" ", "").trim(),
);
throw new Error("cannot find function 'main'");
}
return mainId;
}
class AllFnsCollector implements AstVisitor {
private allFns = new Map<number, Stmt>();
public collect(ast: Stmt[]): Map<number, Stmt> {
visitStmts(ast, this);
return this.allFns;
}
visitFnStmt(stmt: Stmt): VisitRes {
if (stmt.kind.type !== "fn") {
throw new Error();
}
this.allFns.set(stmt.id, stmt);
}
}
class Monomorphizer {
private monomorphizedFns = new Map<string, MonomorphizedFn>();
public constructor(private allFns: Map<number, Stmt>) {}
public monomorphize(mainFn: Stmt): Map<string, MonomorphizedFn> {
this.monomorphizeFn(mainFn);
return this.monomorphizedFns;
}
private monomorphizeFn(stmt: Stmt, genericArgs?: VType[]) {
const calls = new FnBodyCallCollector().collect(stmt);
for (const expr of calls) {
if (expr.kind.type !== "call") {
throw new Error();
}
const vtype = expr.kind.subject.vtype!;
if (vtype.type === "fn") {
const stmt = this.allFns.get(vtype.fnStmtId)!;
if (stmt.kind.type !== "fn") {
throw new Error();
}
const mid = fnCallMid(expr, stmt);
if (!this.monomorphizedFns.has(mid)) {
this.monomorphizedFns.set(mid, { mid, stmt });
this.monomorphizeFn(stmt);
}
return;
} else if (vtype.type === "generic_args") {
if (vtype.subject.type !== "fn") {
throw new Error();
}
const stmt = this.allFns.get(vtype.subject.fnStmtId)!;
if (stmt.kind.type !== "fn") {
throw new Error();
}
const mid = fnCallMid(expr, stmt);
if (!this.monomorphizedFns.has(mid)) {
this.monomorphizedFns.set(mid, { mid, stmt, genericArgs });
this.monomorphizeFn(stmt, vtype.genericArgs);
}
return;
}
throw new Error();
}
}
}
class FnBodyCallCollector implements AstVisitor {
private calls: Expr[] = [];
public collect(stmt: Stmt): Expr[] {
if (stmt.kind.type !== "fn") {
throw new Error();
}
visitExpr(stmt.kind.body, this);
return this.calls;
}
visitCallExpr(expr: Expr): VisitRes {
if (expr.kind.type !== "call") {
throw new Error();
}
this.calls.push(expr);
}
}
export function fnCallMid(expr: Expr, stmt: Stmt) {
console.log(expr);
if (expr.kind.type !== "call") {
throw new Error();
}
const vtype = expr.kind.subject.vtype!;
if (vtype.type === "fn") {
return fnStmtMid(stmt);
} else if (vtype.type === "generic_args") {
if (vtype.subject.type !== "fn") {
throw new Error();
}
return fnStmtMid(stmt, vtype.genericArgs);
}
throw new Error();
}
export function fnStmtMid(stmt: Stmt, genericArgs?: VType[]) {
if (stmt.kind.type !== "fn") {
throw new Error();
}
const { kind: { ident }, id } = stmt;
if (genericArgs !== undefined) {
const genericArgsStr = genericArgs
.map((arg) => vtypeMidPart(arg))
.join("_");
return `${ident}_${id}_${genericArgsStr}`;
} else {
return ident === "main" ? "main" : `${ident}_${id}`;
}
}
export function vtypeMidPart(vtype: VType): string {
switch (vtype.type) {
case "string":
case "int":
case "bool":
case "null":
case "unknown":
return vtype.type;
case "array":
return `array(${vtypeMidPart(vtype.inner)})`;
case "struct":
return `struct(${vtype.structId})`;
case "fn":
return `fn(${vtype.fnStmtId})`;
case "error":
throw new Error("error in type");
case "generic":
case "generic_args":
throw new Error("cannot be monomorphized");
}
}