import { Expr, Stmt } from "./ast.ts"; import { AstVisitor, visitExpr, VisitRes, visitStmts } from "./ast_visitor.ts"; import { GenericArgsMap, VType } from "./vtype.ts"; export class Monomorphizer { private fnIdCounter = 0; private fns: MonoFnsMap = {}; private callMap: MonoCallNameGenMap = {}; private allFns: Map; private entryFn: Stmt; constructor(private ast: Stmt[]) { this.allFns = new AllFnsCollector().collect(this.ast); this.entryFn = findMain(this.allFns); } public monomorphize(): MonoResult { this.monomorphizeFn(this.entryFn); return { monoFns: this.fns, callMap: this.callMap }; } private monomorphizeFn( stmt: Stmt, genericArgs?: GenericArgsMap, ): MonoFn { const id = this.fnIdCounter; this.fnIdCounter += 1; const nameGen = monoFnNameGen(id, stmt, genericArgs); if (nameGen in this.fns) { return this.fns[nameGen]; } const monoFn = { id, nameGen, stmt, genericArgs }; this.fns[nameGen] = monoFn; const calls = new CallCollector().collect(stmt); for (const call of calls) { this.callMap[call.id] = nameGen; if (call.kind.type !== "call") { throw new Error(); } if ( call.kind.subject.vtype?.type === "fn" && call.kind.subject.vtype.genericParams === undefined ) { const fn = this.allFns.get(call.kind.subject.vtype.stmtId); if (fn === undefined) { throw new Error(); } const monoFn = this.monomorphizeFn(fn); this.callMap[call.id] = monoFn.nameGen; continue; } if ( call.kind.subject.vtype?.type === "fn" && call.kind.subject.vtype.genericParams !== undefined ) { if (call.kind.genericArgs === undefined) { throw new Error(); } const genericArgs = call.kind.genericArgs; const monoArgs: GenericArgsMap = {}; for (const key in call.kind.genericArgs) { const vtype = genericArgs[key]; if (vtype.type === "generic") { if (genericArgs === undefined) { throw new Error(); } monoArgs[key] = genericArgs[vtype.param.id]; } else { monoArgs[key] = vtype; } } const fnType = call.kind.subject.vtype!; if (fnType.type !== "fn") { throw new Error(); } const fn = this.allFns.get(fnType.stmtId); if (fn === undefined) { throw new Error(); } const monoFn = this.monomorphizeFn(fn, monoArgs); this.callMap[call.id] = monoFn.nameGen; continue; } if (call.kind.subject.vtype?.type === "generic_spec") { const genericSpecType = call.kind.subject.vtype!; if (genericSpecType.subject.type !== "fn") { throw new Error(); } const fnType = genericSpecType.subject; const monoArgs: GenericArgsMap = {}; for (const key in genericSpecType.genericArgs) { const vtype = genericSpecType.genericArgs[key]; if (vtype.type === "generic") { if (genericArgs === undefined) { throw new Error(); } monoArgs[key] = genericArgs[vtype.param.id]; } else { monoArgs[key] = vtype; } } const fn = this.allFns.get(fnType.stmtId); if (fn === undefined) { throw new Error(); } const monoFn = this.monomorphizeFn(fn, monoArgs); this.callMap[call.id] = monoFn.nameGen; continue; } throw new Error(); } return monoFn; } } export type MonoResult = { monoFns: MonoFnsMap; callMap: MonoCallNameGenMap; }; export type MonoFnsMap = { [nameGen: string]: MonoFn }; export type MonoFn = { id: number; nameGen: string; stmt: Stmt; genericArgs?: GenericArgsMap; }; export type MonoCallNameGenMap = { [exprId: number]: string }; function monoFnNameGen( id: number, stmt: Stmt, genericArgs?: GenericArgsMap, ): string { if (stmt.kind.type !== "fn") { throw new Error(); } if (stmt.kind.ident === "main") { return "main"; } if (genericArgs === undefined) { return `${stmt.kind.ident}_${id}`; } const args = Object.values(genericArgs) .map((arg) => vtypeNameGenPart(arg)) .join("_"); return `${stmt.kind.ident}_${id}_${args}`; } function vtypeNameGenPart(vtype: VType): string { switch (vtype.type) { case "error": throw new Error("error in type"); case "string": case "int": case "bool": case "null": case "unknown": return vtype.type; case "array": return `[${vtypeNameGenPart(vtype.inner)}]`; case "struct": { const fields = vtype.fields .map((field) => `${field.ident}, ${vtypeNameGenPart(field.vtype)}` ) .join(", "); return `struct { ${fields} }`; } case "fn": return `fn(${vtype.stmtId})`; case "generic": case "generic_spec": throw new Error("cannot be monomorphized"); } } class AllFnsCollector implements AstVisitor { private allFns = new Map(); public collect(ast: Stmt[]): Map { visitStmts(ast, this); return this.allFns; } visitFnStmt(stmt: Stmt): VisitRes { if (stmt.kind.type !== "fn") { throw new Error(); } this.allFns.set(stmt.id, stmt); } } function findMain(fns: Map): Stmt { const mainId = fns.values().find((stmt) => stmt.kind.type === "fn" && stmt.kind.ident === "main" ); if (mainId === undefined) { console.error("error: cannot find function 'main'"); console.error(apology); throw new Error("cannot find function 'main'"); } return mainId; } class CallCollector implements AstVisitor { private calls: Expr[] = []; public collect(fn: Stmt): Expr[] { if (fn.kind.type !== "fn") { throw new Error(); } visitExpr(fn.kind.body, this); return this.calls; } visitFnStmt(_stmt: Stmt): VisitRes { return "stop"; } visitCallExpr(expr: Expr): VisitRes { if (expr.kind.type !== "call") { throw new Error(); } this.calls.push(expr); } } const apology = ` Hear me out. Monomorphization, meaning the process inwich generic functions are stamped out into seperate specialized functions is actually really hard, and I have a really hard time right now, figuring out, how to do it in a smart way. To really explain it, let's imagine you have a function, you defined as a(). For each call with seperate generics arguments given, such as a::() and a::(), a specialized function has to be 'stamped out', ie. created and put into the compilation with the rest of the program. Now to the reason as to why 'main' is needed. To do the monomorphization, we have to do it recursively. To explain this, imagine you have a generic function a and inside the body of a, you call another generic function such as b with the same generic type. This means that the monomorphization process of b depends on the monomorphization of a. What this essentially means, is that the monomorphization process works on the program as a call graph, meaning a graph or tree structure where each represents a function call to either another function or a recursive call to the function itself. But a problem arises from doing it this way, which is that a call graph will need an entrypoint. The language, as it is currently, does not really require a 'main'-function. Or maybe it does, but that's beside the point. The point is that we need a main function, to be the entry point for the call graph. The monomorphization process then runs through the program from that entry point. This means that each function we call, will itself be monomorphized and added to the compilation. It also means that functions that are not called, will also not be added to the compilation. This essentially eliminates uncalled/dead functions. Is this particularly smart to do in such a high level part of the compilation process? I don't know. It's obvious that we can't just use every function as an entry point in the call graph, because we're actively added new functions. Additionally, with generic functions, we don't know, if they're the entry point, what generic arguments, they should be monomorphized with. We could do monomorphization the same way C++ does it, where all non-generic functions before monomorphization are treated as entry points in the call graph. But this has the drawback that generic and non-generic functions are treated differently, which has many underlying drawbacks, especially pertaining to the amount of work needed to handle both in all proceeding steps of the compiler. Anyways, I just wanted to yap and complain about the way generics and monomorphization has made the compiler 100x more complicated, and that I find it really hard to implement in a way, that is not either too simplistic or so complicated and advanced I'm too dumb to implement it. So if you would be so kind as to make it clear to the compiler, what function it should designate as the entry point to the call graph, it will use for monomorphization, that would be very kind of you. The way you do this, is by added or selecting one of your current functions and giving it the name of 'main'. This is spelled m-a-i-n. The word is synonemous with the words primary and principle. The name is meant to designate the entry point into the program, which is why the monomorphization process uses this specific function as the entry point into the call graph, it generates. So if you would be so kind as to do that, that would really make my day. In any case, keep hacking ferociously on whatever you're working on. I have monomorphizer to implement. See ya. -Your favorite compiler girl <3 `.replaceAll(" ", "").trim();