From b8b9a08229121ed6a2f19331d85b60ede27ab08f Mon Sep 17 00:00:00 2001 From: Mikkel Kongsted Date: Fri, 15 Nov 2024 15:20:49 +0100 Subject: [PATCH] parser --- deno.lock | 17 +++ example.slg | 2 + src/Lexer.ts | 96 ++++++++++-- src/Parser.ts | 406 ++++++++++++++++++++++++++++++++++++++++++++++++++ src/ast.ts | 49 ++++++ src/main.ts | 11 +- 6 files changed, 563 insertions(+), 18 deletions(-) create mode 100644 deno.lock mode change 100644 => 100755 example.slg create mode 100644 src/Parser.ts create mode 100644 src/ast.ts diff --git a/deno.lock b/deno.lock new file mode 100644 index 0000000..5431e35 --- /dev/null +++ b/deno.lock @@ -0,0 +1,17 @@ +{ + "version": "4", + "specifiers": { + "npm:@types/node@*": "22.5.4" + }, + "npm": { + "@types/node@22.5.4": { + "integrity": "sha512-FDuKUJQm/ju9fT/SeX/6+gBzoPzlVCzfzmGkwKvRHQVxi4BntVbyIwf6a4Xn62mrvndLiml6z/UBXIdEVjQLXg==", + "dependencies": [ + "undici-types" + ] + }, + "undici-types@6.19.8": { + "integrity": "sha512-ve2KP6f/JnbPBFyobGHuerC9g1FYGn/F8n1LWTwNxCEzd6IfqTwUQcNXgEtmmQ6DlRrC1hrSrBnCZPokRrDHjw==" + } + } +} diff --git a/example.slg b/example.slg old mode 100644 new mode 100755 index 4eac52e..19ea8fc --- a/example.slg +++ b/example.slg @@ -19,6 +19,8 @@ else { println(":o"); } +oopjoipjioj + loop { let i = 0; diff --git a/src/Lexer.ts b/src/Lexer.ts index 6deae17..63fca6c 100644 --- a/src/Lexer.ts +++ b/src/Lexer.ts @@ -17,15 +17,7 @@ export class Lexer { this.step(); return this.next(); } - if (this.test("/")) { - this.step() - if (this.test("/")) { - while (!this.done() && !this.test("\n")) - this.step(); - return this.token("//", pos) - } - return this.token("/", pos) - } + if (this.test(/[a-zA-Z_]/)) { let value = ""; while (!this.done() && this.test(/[a-zA-Z0-9_]/)) { @@ -41,7 +33,7 @@ export class Lexer { return { ...this.token("ident", pos), identValue: value }; } } - if (this.test(/[0-9]/)) { + if (this.test(/[1-9]/)) { let textValue = ""; while (!this.done() && this.test(/[0-9]/)) { textValue += this.current(); @@ -77,23 +69,105 @@ export class Lexer { this.step(); return { ...this.token("string", pos), stringValue: value }; } - if (this.test(/[\+\{\};=]/)) { + if (this.test(/[\+\{\};=\-\*\(\)\.,:;\[\]>" && !this.done() && this.test("=")) { + this.step(); + return this.token(">=", pos); + } + if (first === "-" && !this.done()) { + if (this.test(">")) { + this.step(); + return this.token("->", pos); + } + if (this.test("=")) { + this.step() + return this.token("-=", pos); + } + } + if (first === "!" && !this.done() && this.test("=")) { + this.step(); + return this.token("!=", pos); + } + if (first === "+" && !this.done() && this.test("=")) { + this.step(); + return this.token("+=", pos); + } return this.token(first, pos); } + if (this.test("/")) { + this.step() + if (this.test("/")) { + while (!this.done() && !this.test("\n")) + this.step(); + return this.token("//", pos) + } + return this.token("/", pos) + } + if (this.test("false")) { + this.step(); + return this.token("false", pos); + } + if (this.test("true")) { + this.step(); + return this.token("true", pos); + } + if (this.test("null")) { + this.step(); + return this.token("null", pos); + } + if (this.test("or")) { + this.step(); + return this.token("or", pos); + } + if (this.test("and")) { + this.step(); + return this.token("and", pos); + } + if (this.test("not")) { + this.step(); + return this.token("not", pos); + } + if (this.test("loop")) { + this.step(); + return this.token("loop", pos); + } + if (this.test("break")) { + this.step(); + return this.token("break", pos); + } + if (this.test("let")) { + this.step(); + return this.token("let", pos); + } + if (this.test("fn")) { + this.step(); + return this.token("fn", pos); + } + if (this.test("return")) { + this.step(); + return this.token("return", pos); + } console.error(`Lexer: illegal character '${this.current()}' at ${pos.line}:${pos.col}`); this.step(); return this.next(); } private done(): boolean { return this.index >= this.text.length; } + private current(): string { return this.text[this.index]; } + public currentPos(): Pos { return this.pos(); } + private step() { if (this.done()) return; diff --git a/src/Parser.ts b/src/Parser.ts new file mode 100644 index 0000000..b4681da --- /dev/null +++ b/src/Parser.ts @@ -0,0 +1,406 @@ +import { Expr, ExprKind, Param, Stmt, StmtKind, BinaryType} from "./ast.ts"; +import { Lexer } from "./Lexer.ts"; +import { Pos, Token } from "./Token.ts"; + +class Parser { + private currentToken: Token | null; + private nextNodeId = 0; + + public constructor(private lexer: Lexer) { + this.currentToken = lexer.next(); + } + + private step() { this.currentToken = this.lexer.next() } + private done(): boolean { return this.currentToken == null; } + private current(): Token { return this.currentToken!; } + private pos(): Pos { + if (this.done()) + return this.lexer.currentPos(); + return this.current().pos; + } + + private test(type: string): boolean { + return !this.done() && this.current().type === type; + } + + private report(msg: string, pos = this.pos()) { + console.log(`Parser: ${msg} at ${pos.line}:${pos.col}`); + } + + private stmt(kind: StmtKind, pos: Pos): Stmt { + const id = this.nextNodeId; + this.nextNodeId += 1; + return { kind, pos, id }; + } + + private expr(kind: ExprKind, pos: Pos): Expr { + const id = this.nextNodeId; + this.nextNodeId += 1; + return { kind, pos, id }; + } + + private parseMultiLineBlockExpr(): Expr { + const pos = this.pos(); + if (this.test("{")) + return this.parseBlock(); + if (this.test("if")) + return this.parseIf(); + if (this.test("loop")) + return this.parseLoop(); + this.report("expected expr"); + return this.expr({ type: "error" }, pos); + } + + private parseSingleLineBlockStmt(): Stmt { + const pos = this.pos(); + if (this.test("let")) + return this.parseLet(); + if (this.test("return")) + return this.parseReturn(); + if (this.test("break")) + return this.parseBreak(); + this.report("expected stmt"); + return this.stmt({ type: "error" }, pos); + } + + private eatSemicolon() { + if (!this.test(";")) { + this.report("expected ';'"); + return; + } + this.step(); + } + + public parseExpr(): Expr { + return this.parsePrefix(); + } + + public parseBlock(): Expr { + const pos = this.pos(); + this.step(); + let stmts: Stmt[] = []; + while (!this.done()) { + if (this.test("}")) { + this.step(); + return this.expr({ type: "block", stmts }, pos); + } else if (this.test("fn")) { + stmts.push(this.parseSingleLineBlockStmt()); + stmts.push(this.parseFn()); + } else if (this.test("{") || this.test("if") || this.test("loop")) { + let expr = this.parseMultiLineBlockExpr(); + if (this.test("}")) { + this.step(); + return this.expr({ type: "block", stmts, expr }, pos); + } + stmts.push(this.stmt({ type: "expr", expr }, expr.pos)); + } else { + const expr = this.parseExpr(); + if (this.test("=")) { + this.step(); + const value = this.parseExpr(); + this.eatSemicolon(); + stmts.push(this.stmt({ type: "assign", subject: expr, value }, pos)); + } else if (this.test(";")) { + this.step(); + stmts.push(this.stmt({ type: "expr", expr }, expr.pos)); + } else if (this.test("}")) { + this.step(); + return this.expr({ type: "block", stmts, expr }, pos); + } else { + this.report("expected ';' or '}'"); + return this.expr({ type: "error" }, pos); + } + } + } + this.report("expected '}'"); + return this.expr({ type: "error" }, pos); + } + + public parseStmts(): Stmt[] { + let stmts: Stmt[] = []; + while (!this.done()) { + if (this.test("fn")) { + stmts.push(this.parseFn()); + } else if (this.test("let") || this.test("return") || this.test("break")) { + stmts.push(this.parseSingleLineBlockStmt()); + this.eatSemicolon(); + } else if (this.test("{") || this.test("if") || this.test("loop")) { + const expr = this.parseMultiLineBlockExpr(); + stmts.push(this.stmt({ type: "expr", expr }, expr.pos)); + } else { + stmts.push(this.parseAssign()); + this.eatSemicolon(); + } + } + return stmts; + } + + public parseFn(): Stmt { + const pos = this.pos(); + this.step(); + if (!this.test("ident")) { + this.report("expected ident"); + return this.stmt({ type: "error" }, pos); + } + const ident = this.current().identValue!; + this.step(); + if (!this.test("(")) { + this.report("expected '('"); + return this.stmt({ type: "error" }, pos); + } + const params = this.parseFnParams(); + if (!this.test("{")) { + this.report("expected block"); + return this.stmt({ type: "error" }, pos); + } + const body = this.parseBlock(); + return this.stmt({ type: "fn", ident, params, body }, pos); + } + + public parseFnParams(): Param[] { + this.step(); + if (this.test(")")) { + this.step(); + return []; + } + let params: Param[] = []; + const paramResult = this.parseParam(); + if (!paramResult.ok) + return []; + params.push(paramResult.value); + while (this.test(",")) { + this.step(); + if (this.test(")")) + break; + const paramResult = this.parseParam(); + if (!paramResult.ok) + return []; + params.push(paramResult.value); + } + if (!this.test(")")) { + this.report("expected ')'"); + return params; + } + this.step(); + return params; + } + + public parseParam(): { ok: true, value: Param } | { ok: false } { + const pos = this.pos(); + if (this.test("ident")) { + const ident = this.current().identValue!; + this.step(); + return { ok: true, value: { ident, pos } }; + } + this.report("expected param"); + return { ok: false }; + } + + public parseLet(): Stmt { + const pos = this.pos(); + this.step(); + const paramResult = this.parseParam(); + if (!paramResult.ok) + return this.stmt({ type: "error" }, pos); + const param = paramResult.value; + if (!this.test("=")) { + this.report("expected '='"); + return this.stmt({ type: "error" }, pos); + } + this.step(); + const value = this.parseExpr(); + return this.stmt({ type: "let", param, value }, pos); + } + public parseAssign(): Stmt { + const pos = this.pos(); + const subject = this.parseExpr(); + if (!this.test("=")) { + return this.stmt({ type: "expr", expr: subject }, pos); + } + this.step(); + const value = this.parseExpr(); + return this.stmt({ type: "assign", subject, value }, pos); + } + + public parseReturn(): Stmt { + const pos = this.pos(); + this.step(); + if (this.test(";")) { + return this.stmt({ type: "return" }, pos); + } + const expr = this.parseExpr(); + return this.stmt({ type: "return", expr }, pos); + } + + public parseBreak(): Stmt { + const pos = this.pos(); + this.step(); + if (this.test(";")) { + return this.stmt({ type: "break" }, pos); + } + const expr = this.parseExpr(); + return this.stmt({ type: "break", expr }, pos); + } + + public parseLoop(): Expr { + const pos = this.pos(); + this.step(); + if (!this.test("{")) { + this.report("expected '}'"); + return this.expr({ type: "error" }, pos); + } + const body = this.parseExpr(); + return this.expr({ type: "loop", body }, pos); + } + + public parseIf(): Expr { + const pos = this.pos(); + this.step(); + const cond = this.parseExpr(); + if (!this.test("{")) { + this.report("expected block"); + return this.expr({ type: "error" }, pos); + } + const truthy = this.parseBlock(); + if (!this.test("else")) { + return this.expr({ type: "if", cond, truthy }, pos); + } + this.step(); + if (this.test("if")) { + const falsy = this.parseIf(); + return this.expr({ type: "if", cond, truthy, falsy }, pos); + } + if (!this.test("{")) { + this.report("expected block"); + return this.expr({ type: "error" }, pos); + } + const falsy = this.parseBlock(); + return this.expr({ type: "if", cond, truthy, falsy }, pos); + } + + public parsePrefix(): Expr { + const pos = this.pos(); + if (this.test("not")) { + this.step(); + const subject = this.parsePrefix(); + return this.expr({ type: "unary", unaryType: "not", subject }, pos); + } + ["+", "*", "==", "-", "/", "!=", "<", ">", "<=", ">=", "or", "and"].forEach((binaryType) => { + this.parseBinary(binaryType as BinaryType, pos) + + }) + return this.parsePostfix(); + } + + public parseBinary(binaryType: BinaryType, pos: Pos) { + if (this.test(binaryType)) { + this.step(); + const left = this.parsePrefix(); + const right = this.parsePrefix(); + return this.expr({ type: "binary", binaryType, left, right }, pos); + } + } + + public parsePostfix(): Expr { + let subject = this.parseOperand(); + while (true) { + const pos = this.pos(); + if (this.test(".")) { + this.step(); + if (!this.test("ident")) { + this.report("expected ident"); + return this.expr({ type: "error" }, pos); + } + const value = this.current().identValue!; + this.step(); + subject = this.expr({ type: "field", subject, value }, pos); + continue; + } + if (this.test("[")) { + this.step(); + const value = this.parseExpr(); + if (!this.test("]")) { + this.report("expected ']'"); + return this.expr({ type: "error" }, pos); + } + this.step(); + subject = this.expr({ type: "index", subject, value }, pos); + continue; + } + if (this.test("(")) { + this.step(); + let args: Expr[] = []; + if (!this.test(")")) { + args.push(this.parseExpr()); + while (this.test(",")) { + this.step(); + if (this.test(")")) + break; + args.push(this.parseExpr()); + } + } + if (!this.test(")")) { + this.report("expected ')'"); + return this.expr({ type: "error" }, pos); + } + this.step(); + subject = this.expr({ type: "call", subject, args }, pos); + continue; + } + break; + } + return subject; + } + + public parseOperand(): Expr { + const pos = this.pos(); + if (this.test("ident")) { + const value = this.current().identValue!; + this.step(); + return this.expr({ type: "ident", value }, pos); + } + if (this.test("int")) { + const value = this.current().intValue!; + this.step(); + return this.expr({ type: "int", value }, pos); + } + if (this.test("string")) { + const value = this.current().stringValue!; + this.step(); + return this.expr({ type: "string", value }, pos); + } + if (this.test("false")) { + this.step(); + return this.expr({ type: "bool", value: false }, pos); + } + if (this.test("true")) { + this.step(); + return this.expr({ type: "bool", value: true }, pos); + } + if (this.test("null")) { + this.step(); + return this.expr({ type: "null"}, pos); + } + if (this.test("(")) { + this.step(); + const expr = this.parseExpr(); + if (!this.test(")")) { + this.report("expected ')'"); + return this.expr({ type: "error" }, pos); + } + this.step(); + return this.expr({ type: "group", expr }, pos); + } + if (this.test("{")) + return this.parseBlock(); + if (this.test("if")) + return this.parseIf(); + if (this.test("loop")) + return this.parseLoop(); + + this.report("expected expr", pos); + this.step(); + return this.expr({ type: "error" }, pos); + } + +} diff --git a/src/ast.ts b/src/ast.ts new file mode 100644 index 0000000..f6b9d89 --- /dev/null +++ b/src/ast.ts @@ -0,0 +1,49 @@ +import { Pos } from "./Token.ts"; + +type UnaryType = "not"; +export type BinaryType = "+" | "*" | "==" | "-" | "/" | "!=" | "<" | ">" | "<=" | ">=" | "or" | "and"; + +export type Param = { + ident: string, + pos: Pos, +}; + +export type Stmt = { + kind: StmtKind, + pos: Pos, + id: number, +}; + +export type StmtKind = + | { type: "error" } + | { type: "break", expr?: Expr } + | { type: "return", expr?: Expr } + | { type: "fn", ident: string, params: Param[], body: Expr } + | { type: "let", param: Param, value: Expr } + | { type: "assign", subject: Expr, value: Expr } + | { type: "expr", expr: Expr } + ; + +export type Expr = { + kind: ExprKind, + pos: Pos, + id: number, +}; + +export type ExprKind = + | { type: "error" } + | { type: "int", value: number } + | { type: "string", value: string } + | { type: "ident", value: string } + | { type: "group", expr: Expr } + | { type: "field", subject: Expr, value: string } + | { type: "index", subject: Expr, value: Expr } + | { type: "call", subject: Expr, args: Expr[] } + | { type: "unary", unaryType: UnaryType, subject: Expr } + | { type: "binary", binaryType: BinaryType, left: Expr, right: Expr } + | { type: "if", cond: Expr, truthy: Expr, falsy?: Expr } + | { type: "bool", value: boolean} + | { type: "null"} + | { type: "loop", body: Expr } + | { type: "block", stmts: Stmt[], expr?: Expr } + ; \ No newline at end of file diff --git a/src/main.ts b/src/main.ts index 31de7e5..806c992 100644 --- a/src/main.ts +++ b/src/main.ts @@ -1,16 +1,13 @@ import { Lexer } from "./Lexer.ts"; +import { readFileSync } from 'node:fs'; -const text = ` - a1 123 + - // comment - "hello" - "escaped\\"\\nnewline" -`; + +const text = readFileSync("example.slg").toString() const lexer = new Lexer(text); let token = lexer.next(); while (token !== null) { const value = token.identValue ?? token.intValue ?? token.stringValue ?? ""; - console.log(`Lexed ${token}(${value})`); + console.log(`${token.type}\t${value}`) token = lexer.next(); }