diff --git a/compiler/ast/ast.ts b/compiler/ast/ast.ts index e384660..5bc6367 100644 --- a/compiler/ast/ast.ts +++ b/compiler/ast/ast.ts @@ -135,7 +135,7 @@ export type DerefExpr = { expr: Expr }; export type ElemExpr = { expr: Expr; elem: number }; export type FieldExpr = { expr: Expr; ident: Ident }; export type IndexExpr = { expr: Expr; index: Expr }; -export type CallExpr = { expr: Expr; args: Expr }; +export type CallExpr = { expr: Expr; args: Expr[] }; export type UnaryExpr = { unaryType: UnaryType; expr: Expr }; export type BinaryExpr = { unaryType: UnaryType; left: Expr; right: Expr }; export type IfExpr = { cond: Expr; truthy: Block; falsy?: Block }; diff --git a/compiler/ast/visitor.ts b/compiler/ast/visitor.ts index d6ad0d0..ba63080 100644 --- a/compiler/ast/visitor.ts +++ b/compiler/ast/visitor.ts @@ -1,4 +1,5 @@ import { exhausted } from "../util.ts"; +import { Block } from "./ast.ts"; import { AnonStructTy, ArrayExpr, @@ -99,6 +100,7 @@ export interface Visitor< visitCallExpr?(expr: Expr, kind: CallExpr, ...p: P): R; visitUnaryExpr?(expr: Expr, kind: UnaryExpr, ...p: P): R; visitBinaryExpr?(expr: Expr, kind: BinaryExpr, ...p: P): R; + visitBlockExpr?(expr: Expr, kind: Block, ...p: P): R; visitIfExpr?(expr: Expr, kind: IfExpr, ...p: P): R; visitLoopExpr?(expr: Expr, kind: LoopExpr, ...p: P): R; visitWhileExpr?(expr: Expr, kind: WhileExpr, ...p: P): R; @@ -119,6 +121,7 @@ export interface Visitor< visitTupleTy?(ty: Ty, kind: TupleTy, ...p: P): R; visitAnonStructTy?(ty: Ty, kind: AnonStructTy, ...p: P): R; + visitBlock?(block: Block, ...p: P): R; visitPath?(path: Path, ...p: P): R; visitIdent?(ident: Ident, ...p: P): R; } @@ -232,8 +235,123 @@ export function visitExpr< case "error": if (v.visitErrorExpr?.(expr, ...p) === "stop") return; return; - case "ident": - if (v.visitIdentExpr?.(expr, kind, ...p) === "stop") return; + case "path": + if (v.visitPathExpr?.(expr, kind, ...p) === "stop") return; + visitPath(v, kind, ...p); + return; + case "null": + if (v.visitNullExpr?.(expr, ...p) === "stop") return; + return; + case "int": + if (v.visitIntExpr?.(expr, kind, ...p) === "stop") return; + return; + case "string": + if (v.visitStringExpr?.(expr, kind, ...p) === "stop") return; + return; + case "bool": + if (v.visitBoolExpr?.(expr, kind, ...p) === "stop") return; + return; + case "group": + if (v.visitGroupExpr?.(expr, kind, ...p) === "stop") return; + visitExpr(v, kind.expr, ...p); + return; + case "array": + if (v.visitArrayExpr?.(expr, kind, ...p) === "stop") return; + for (const expr of kind.exprs) { + visitExpr(v, expr, ...p); + } + return; + case "repeat": + if (v.visitRepeatExpr?.(expr, kind, ...p) === "stop") return; + visitExpr(v, kind.expr, ...p); + visitExpr(v, kind.length, ...p); + return; + case "struct": + if (v.visitStructExpr?.(expr, kind, ...p) === "stop") return; + if (kind.path) { + visitPath(v, kind.path, ...p); + } + for (const field of kind.field) { + visitIdent(v, field.ident, ...p); + visitExpr(v, field.expr, ...p); + } + return; + case "ref": + if (v.visitRefExpr?.(expr, kind, ...p) === "stop") return; + visitExpr(v, kind.expr, ...p); + return; + case "deref": + if (v.visitDerefExpr?.(expr, kind, ...p) === "stop") return; + visitExpr(v, kind.expr, ...p); + return; + case "elem": + if (v.visitElemExpr?.(expr, kind, ...p) === "stop") return; + visitExpr(v, kind.expr, ...p); + return; + case "field": + if (v.visitFieldExpr?.(expr, kind, ...p) === "stop") return; + v.visitExpr?.(kind.expr, ...p); + v.visitIdent?.(kind.ident, ...p); + return; + case "index": + if (v.visitIndexExpr?.(expr, kind, ...p) === "stop") return; + visitExpr(v, kind.expr, ...p); + visitExpr(v, kind.index, ...p); + return; + case "call": + if (v.visitCallExpr?.(expr, kind, ...p) === "stop") return; + visitExpr(v, kind.expr, ...p); + for (const expr of kind.args) { + visitExpr(v, expr, ...p); + } + return; + case "unary": + if (v.visitUnaryExpr?.(expr, kind, ...p) === "stop") return; + visitExpr(v, kind.expr, ...p); + return; + case "binary": + if (v.visitBinaryExpr?.(expr, kind, ...p) === "stop") return; + visitExpr(v, kind.left, ...p); + visitExpr(v, kind.right, ...p); + return; + case "block": + if (v.visitBlockExpr?.(expr, kind, ...p) === "stop") return; + visitBlock(v, kind, ...p); + return; + case "if": + if (v.visitIfExpr?.(expr, kind, ...p) === "stop") return; + visitExpr(v, kind.cond, ...p); + visitBlock(v, kind.truthy, ...p); + if (kind.falsy) { + visitBlock(v, kind.falsy, ...p); + } + return; + case "loop": + if (v.visitLoopExpr?.(expr, kind, ...p) === "stop") return; + visitBlock(v, kind.body, ...p); + return; + case "while": + if (v.visitWhileExpr?.(expr, kind, ...p) === "stop") return; + visitExpr(v, kind.cond, ...p); + visitBlock(v, kind.body, ...p); + return; + case "for": + if (v.visitForExpr?.(expr, kind, ...p) === "stop") return; + visitPat(v, kind.pat, ...p); + visitExpr(v, kind.expr, ...p); + visitBlock(v, kind.body, ...p); + return; + case "c_for": + if (v.visitCForExpr?.(expr, kind, ...p) === "stop") return; + if (kind.decl) { + visitStmt(v, kind.decl, ...p); + } + if (kind.cond) { + visitExpr(v, kind.cond, ...p); + } + if (kind.incr) { + visitStmt(v, kind.incr, ...p); + } return; } exhausted(kind); @@ -270,13 +388,64 @@ export function visitTy< case "error": if (v.visitErrorTy?.(ty, ...p) === "stop") return; return; - case "ident": - if (v.visitIdentTy?.(ty, kind, ...p) === "stop") return; + case "path": + if (v.visitPathTy?.(ty, kind, ...p) === "stop") return; + v.visitPath?.(kind, ...p); + return; + case "ref": + if (v.visitRefTy?.(ty, kind, ...p) === "stop") return; + v.visitTy?.(kind.ty, ...p); + return; + case "ptr": + if (v.visitPtrTy?.(ty, kind, ...p) === "stop") return; + v.visitTy?.(kind.ty, ...p); + return; + case "slice": + if (v.visitSliceTy?.(ty, kind, ...p) === "stop") return; + v.visitTy?.(kind.ty, ...p); + return; + case "array": + if (v.visitArrayTy?.(ty, kind, ...p) === "stop") return; + v.visitTy?.(kind.ty, ...p); + v.visitExpr?.(kind.length, ...p); + return; + case "anon_struct": + if (v.visitAnonStructTy?.(ty, kind, ...p) === "stop") return; + for (const field of kind.fields) { + v.visitIdent?.(field.ident, ...p); + v.visitTy?.(field.ty, ...p); + } return; } exhausted(kind); } +export function visitBlock< + P extends PM = [], +>( + v: Visitor

, + block: Block, + ...p: P +) { + v.visitBlock?.(block, ...p); + for (const stmt of block.stmts) { + visitStmt(v, stmt, ...p); + } + if (block.expr) { + visitExpr(v, block.expr, ...p); + } +} + +export function visitPath< + P extends PM = [], +>( + v: Visitor

, + path: Path, + ...p: P +) { + v.visitPath?.(path, ...p); +} + export function visitIdent< P extends PM = [], >( diff --git a/compiler/main.ts b/compiler/main.ts index 6cbe952..46b48a8 100644 --- a/compiler/main.ts +++ b/compiler/main.ts @@ -63,7 +63,7 @@ export class FileTreeAstCollector implements ast.Visitor<[_P]> { this.superFile, text, ); - const fileAst = new Parser(file, text).parse(); + const fileAst = new Parser(this.ctx, file).parse(); this.ctx.addFileAst(file, fileAst); ast.visitFile(this, fileAst, { file }); await this.subFilePromise; diff --git a/compiler/parser/lexer.ts b/compiler/parser/lexer.ts new file mode 100644 index 0000000..7b6325c --- /dev/null +++ b/compiler/parser/lexer.ts @@ -0,0 +1,338 @@ +import { Ctx, File } from "../ctx.ts"; +import { Pos, Span } from "../diagnostics.ts"; +import { ControlFlow, range } from "../util.ts"; +import { Token, TokenIter } from "./token.ts"; + +export class Lexer implements TokenIter { + private idx = 0; + private line = 1; + private col = 1; + + private text: string; + + public constructor( + private ctx: Ctx, + private file: File, + ) { + this.text = ctx.fileInfo(file).text; + } + + next(): Token | null { + if (this.done()) { + return null; + } + let cf: ControlFlow; + if ( + cf = this.lexWithTail( + (span) => this.token("whitespace", span), + /[ \t\r\n]/, + ), cf.break + ) { + return cf.val; + } + if ( + cf = this.lexWithTail( + (span, val) => { + return keywords.has(val) + ? this.token(val, span) + : this.token("ident", span, { + type: "ident", + identValue: val, + }); + }, + /[a-zA-Z_]/, + /[a-zA-Z0-9_]/, + ), cf.break + ) { + return cf.val; + } + if ( + cf = this.lexWithTail( + (span, val) => + this.token("int", span, { + type: "int", + intValue: parseInt(val), + }), + /[1-9]/, + /[0-9]/, + ), cf.break + ) { + return cf.val; + } + const begin = this.pos(); + let end = begin; + const pos = begin; + if (this.test("0")) { + this.step(); + if (!this.done() && this.test(/[0-9]/)) { + this.report("invalid number", pos); + return this.token("error", { begin, end }); + } + return this.token("int", { begin, end }, { + type: "int", + intValue: 0, + }); + } + + if (this.test("'")) { + this.step(); + let value: string; + if (this.test("\\")) { + this.step(); + if (this.done()) { + this.report("malformed character literal", pos); + return this.token("error", { begin, end }); + } + value = { + n: "\n", + t: "\t", + "0": "\0", + }[this.current()] ?? this.current(); + } else { + value = this.current(); + } + this.step(); + if (this.done() || !this.test("'") || value.length === 0) { + this.report("malformed character literal", pos); + return this.token("error", { begin, end }); + } + this.step(); + return this.token("int", { begin, end }, { + type: "int", + intValue: value.charCodeAt(0), + }); + } + + if (this.test('"')) { + this.step(); + let value = ""; + while (!this.done() && !this.test('"')) { + if (this.test("\\")) { + this.step(); + if (this.done()) { + break; + } + value += { + n: "\n", + t: "\t", + "0": "\0", + }[this.current()] ?? this.current(); + } else { + value += this.current(); + } + this.step(); + } + if (this.done() || !this.test('"')) { + this.report("unclosed/malformed string", pos); + return this.token("error", { begin, end }); + } + this.step(); + return this.token("string", { begin, end }, { + type: "string", + stringValue: value, + }); + } + + if (this.test("/")) { + this.step(); + + if (this.test("/")) { + while (!this.done() && !this.test("\n")) { + end = this.pos(); + this.step(); + } + return this.token("comment", { begin, end }); + } + + if (this.test("*")) { + end = this.pos(); + this.step(); + let depth = 1; + let last: string | undefined = undefined; + while (!this.done() && depth > 0) { + if (last === "*" && this.current() === "/") { + depth -= 1; + last = undefined; + } else if (last === "/" && this.current() === "*") { + depth += 1; + last = undefined; + } else { + last = this.current(); + } + end = this.pos(); + this.step(); + } + if (depth !== 0) { + this.report("unclosed/malformed multiline comment", pos); + return this.token("comment", { begin, end }); + } + } + + return this.token("/", { begin, end }); + } + + const match = this.text.slice(this.idx).match( + new RegExp(`^(${ + staticTokenRes + .map((tok) => tok.length > 1 ? `(?:${tok})` : tok) + .join("|") + })`), + ); + if (match) { + for (const _ of range(match[1].length)) { + end = this.pos(); + this.step(); + } + return this.token(match[1], { begin, end }); + } + + this.report(`illegal character '${this.current()}'`, pos); + this.step(); + return this.next(); + } + + private lexWithTail( + builder: (span: Span, val: string) => R, + startPat: RegExp, + tailPat = startPat, + ): ControlFlow { + const begin = this.pos(); + if (!this.test(startPat)) { + return ControlFlow.Continue(undefined); + } + let end = begin; + let val = this.current(); + this.step(); + while (this.test(tailPat)) { + end = begin; + val += this.current(); + this.step(); + } + return ControlFlow.Break(builder({ begin, end }, val)); + } + + private done(): boolean { + return this.idx >= this.text.length; + } + + private current(): string { + return this.text[this.idx]; + } + + private step() { + if (this.done()) { + return; + } + if (this.current() === "\n") { + this.line += 1; + this.col = 1; + } else { + this.col += 1; + } + this.idx += 1; + } + + private pos(): Pos { + return { + idx: this.idx, + line: this.line, + col: this.col, + }; + } + + private token(type: string, span: Span, token?: Partial): Token { + const length = span.end.idx - span.begin.idx + 1; + return { type, span, length, ...token }; + } + + private test(pattern: RegExp | string): boolean { + if (this.done()) { + return false; + } + if (typeof pattern === "string") { + return this.current() === pattern; + } else if (pattern.source.startsWith("^")) { + return pattern.test(this.text.slice(this.idx)); + } else { + return pattern.test(this.current()); + } + } + + private report(msg: string, pos: Pos) { + this.ctx.report({ + severity: "error", + origin: "parser", + file: this.file, + msg, + pos, + }); + } +} + +const keywords = new Set([ + "false", + "true", + "null", + "int", + "bool", + "string", + "return", + "break", + "continue", + "let", + "mut", + "fn", + "loop", + "if", + "else", + "struct", + "enum", + "or", + "and", + "not", + "while", + "for", + "in", + "mod", + "pub", + "use", + "type_alias", +]); + +const staticTokens = [ + "=", + "==", + "<", + "<=", + ">", + ">=", + "-", + "->", + "!", + "!=", + "+", + "+=", + "-=", + ":", + "::", + "::<", + "(", + ")", + "{", + "}", + "[", + "]", + "<", + ">", + ".", + ",", + ":", + ";", + "#", + "&", + "0", +] as const; + +const staticTokenRes = staticTokens + .toSorted((a, b) => b.length - a.length) + .map((tok) => tok.split("").map((c) => `\\${c}`).join("")); diff --git a/compiler/parser/parser.ts b/compiler/parser/parser.ts index 15186af..aa66507 100644 --- a/compiler/parser/parser.ts +++ b/compiler/parser/parser.ts @@ -1,14 +1,98 @@ -import { File } from "../ast/ast.ts"; -import { File as CtxFile } from "../ctx.ts"; +import { + Expr, + ExprKind, + File, + Ident, + Item, + ItemKind, + Pat, + PatKind, + Stmt, + StmtKind, + Ty, + TyKind, +} from "../ast/ast.ts"; +import { Ctx, File as CtxFile } from "../ctx.ts"; +import { Span } from "../diagnostics.ts"; import { todo } from "../util.ts"; +import { Lexer } from "./lexer.ts"; +import { Token } from "./token.ts"; export class Parser { + private lexer: Lexer; + private currentToken: Token | null; + public constructor( + private ctx: Ctx, private file: CtxFile, - private text: string, - ) {} + ) { + this.lexer = new Lexer(this.ctx, this.file); + this.currentToken = this.lexer.next(); + } public parse(): File { - return todo(); + return this.parseStmts(); + } + + private parseStmts(): Stmt[] { + const stmts: Stmt[] = []; + while (!this.done()) { + stmts.push(this.parseStmt()); + } + return stmts; + } + + private step() { + this.currentToken = this.lexer.next(); + } + private done(): boolean { + return this.currentToken == null; + } + private current(): Token { + return this.currentToken!; + } + private pos(): Pos { + if (this.done()) { + return this.lexer.currentPos(); + } + return this.current().pos; + } + + private test(type: string): boolean { + return !this.done() && this.current().type === type; + } + + private report(msg: string, pos = this.pos()) { + this.reporter.reportError({ + msg, + pos, + reporter: "Parser", + }); + printStackTrace(); + } + + private stmt(kind: StmtKind, span: Span): Stmt { + return { kind, span }; + } + + private item( + kind: ItemKind, + span: Span, + ident: Ident, + pub: boolean, + ): Item { + return { kind, span, ident, pub }; + } + + private expr(kind: ExprKind, span: Span): Expr { + return { kind, span }; + } + + private pat(kind: PatKind, span: Span): Pat { + return { kind, span }; + } + + private ty(kind: TyKind, span: Span): Ty { + return { kind, span }; } } diff --git a/compiler/parser/token.ts b/compiler/parser/token.ts new file mode 100644 index 0000000..ecfdb6a --- /dev/null +++ b/compiler/parser/token.ts @@ -0,0 +1,32 @@ +import { Span } from "../diagnostics.ts"; + +export type Token = TokenData & { + span: Span; + length: number; +}; + +export type TokenData = + | { type: "ident"; identValue: string } + | { type: "int"; intValue: number } + | { type: "string"; stringValue: string } + | { type: string }; + +export interface TokenIter { + next(): Token | null; +} + +export class SigFilter implements TokenIter { + public constructor(private iter: TokenIter) {} + + next(): Token | null { + const token = this.iter.next(); + if (token === null) { + return token; + } + if (token?.type === "whitespace" || token?.type === "comment") { + return this.next(); + } + return token; + } +} + diff --git a/compiler/util.ts b/compiler/util.ts index 7b8b9bf..fba49e0 100644 --- a/compiler/util.ts +++ b/compiler/util.ts @@ -7,3 +7,25 @@ export function exhausted(_: never) { class Unexhausted extends Error {} throw new Unexhausted(); } + +export type Res = Ok | Err; +export type Ok = { ok: true; val: V }; +export type Err = { ok: false; val: E }; + +export const Ok = (val: V): Ok => ({ ok: true, val }); +export const Err = (val: E): Err => ({ ok: false, val }); + +export type ControlFlow< + R = undefined, + V = undefined, +> = Break | Continue; + +export type Break = { break: true; val: R }; +export type Continue = { break: false; val: V }; + +export const ControlFlow = { + Break: (val: R): Break => ({ break: true, val }), + Continue: (val: V): Continue => ({ break: false, val }), +} as const; + +export const range = (length: number) => (new Array(length).fill(0));