diff --git a/compiler/ast/ast.ts b/compiler/ast/ast.ts
index e384660..5bc6367 100644
--- a/compiler/ast/ast.ts
+++ b/compiler/ast/ast.ts
@@ -135,7 +135,7 @@ export type DerefExpr = { expr: Expr };
export type ElemExpr = { expr: Expr; elem: number };
export type FieldExpr = { expr: Expr; ident: Ident };
export type IndexExpr = { expr: Expr; index: Expr };
-export type CallExpr = { expr: Expr; args: Expr };
+export type CallExpr = { expr: Expr; args: Expr[] };
export type UnaryExpr = { unaryType: UnaryType; expr: Expr };
export type BinaryExpr = { unaryType: UnaryType; left: Expr; right: Expr };
export type IfExpr = { cond: Expr; truthy: Block; falsy?: Block };
diff --git a/compiler/ast/visitor.ts b/compiler/ast/visitor.ts
index d6ad0d0..ba63080 100644
--- a/compiler/ast/visitor.ts
+++ b/compiler/ast/visitor.ts
@@ -1,4 +1,5 @@
import { exhausted } from "../util.ts";
+import { Block } from "./ast.ts";
import {
AnonStructTy,
ArrayExpr,
@@ -99,6 +100,7 @@ export interface Visitor<
visitCallExpr?(expr: Expr, kind: CallExpr, ...p: P): R;
visitUnaryExpr?(expr: Expr, kind: UnaryExpr, ...p: P): R;
visitBinaryExpr?(expr: Expr, kind: BinaryExpr, ...p: P): R;
+ visitBlockExpr?(expr: Expr, kind: Block, ...p: P): R;
visitIfExpr?(expr: Expr, kind: IfExpr, ...p: P): R;
visitLoopExpr?(expr: Expr, kind: LoopExpr, ...p: P): R;
visitWhileExpr?(expr: Expr, kind: WhileExpr, ...p: P): R;
@@ -119,6 +121,7 @@ export interface Visitor<
visitTupleTy?(ty: Ty, kind: TupleTy, ...p: P): R;
visitAnonStructTy?(ty: Ty, kind: AnonStructTy, ...p: P): R;
+ visitBlock?(block: Block, ...p: P): R;
visitPath?(path: Path, ...p: P): R;
visitIdent?(ident: Ident, ...p: P): R;
}
@@ -232,8 +235,123 @@ export function visitExpr<
case "error":
if (v.visitErrorExpr?.(expr, ...p) === "stop") return;
return;
- case "ident":
- if (v.visitIdentExpr?.(expr, kind, ...p) === "stop") return;
+ case "path":
+ if (v.visitPathExpr?.(expr, kind, ...p) === "stop") return;
+ visitPath(v, kind, ...p);
+ return;
+ case "null":
+ if (v.visitNullExpr?.(expr, ...p) === "stop") return;
+ return;
+ case "int":
+ if (v.visitIntExpr?.(expr, kind, ...p) === "stop") return;
+ return;
+ case "string":
+ if (v.visitStringExpr?.(expr, kind, ...p) === "stop") return;
+ return;
+ case "bool":
+ if (v.visitBoolExpr?.(expr, kind, ...p) === "stop") return;
+ return;
+ case "group":
+ if (v.visitGroupExpr?.(expr, kind, ...p) === "stop") return;
+ visitExpr(v, kind.expr, ...p);
+ return;
+ case "array":
+ if (v.visitArrayExpr?.(expr, kind, ...p) === "stop") return;
+ for (const expr of kind.exprs) {
+ visitExpr(v, expr, ...p);
+ }
+ return;
+ case "repeat":
+ if (v.visitRepeatExpr?.(expr, kind, ...p) === "stop") return;
+ visitExpr(v, kind.expr, ...p);
+ visitExpr(v, kind.length, ...p);
+ return;
+ case "struct":
+ if (v.visitStructExpr?.(expr, kind, ...p) === "stop") return;
+ if (kind.path) {
+ visitPath(v, kind.path, ...p);
+ }
+ for (const field of kind.field) {
+ visitIdent(v, field.ident, ...p);
+ visitExpr(v, field.expr, ...p);
+ }
+ return;
+ case "ref":
+ if (v.visitRefExpr?.(expr, kind, ...p) === "stop") return;
+ visitExpr(v, kind.expr, ...p);
+ return;
+ case "deref":
+ if (v.visitDerefExpr?.(expr, kind, ...p) === "stop") return;
+ visitExpr(v, kind.expr, ...p);
+ return;
+ case "elem":
+ if (v.visitElemExpr?.(expr, kind, ...p) === "stop") return;
+ visitExpr(v, kind.expr, ...p);
+ return;
+ case "field":
+ if (v.visitFieldExpr?.(expr, kind, ...p) === "stop") return;
+ v.visitExpr?.(kind.expr, ...p);
+ v.visitIdent?.(kind.ident, ...p);
+ return;
+ case "index":
+ if (v.visitIndexExpr?.(expr, kind, ...p) === "stop") return;
+ visitExpr(v, kind.expr, ...p);
+ visitExpr(v, kind.index, ...p);
+ return;
+ case "call":
+ if (v.visitCallExpr?.(expr, kind, ...p) === "stop") return;
+ visitExpr(v, kind.expr, ...p);
+ for (const expr of kind.args) {
+ visitExpr(v, expr, ...p);
+ }
+ return;
+ case "unary":
+ if (v.visitUnaryExpr?.(expr, kind, ...p) === "stop") return;
+ visitExpr(v, kind.expr, ...p);
+ return;
+ case "binary":
+ if (v.visitBinaryExpr?.(expr, kind, ...p) === "stop") return;
+ visitExpr(v, kind.left, ...p);
+ visitExpr(v, kind.right, ...p);
+ return;
+ case "block":
+ if (v.visitBlockExpr?.(expr, kind, ...p) === "stop") return;
+ visitBlock(v, kind, ...p);
+ return;
+ case "if":
+ if (v.visitIfExpr?.(expr, kind, ...p) === "stop") return;
+ visitExpr(v, kind.cond, ...p);
+ visitBlock(v, kind.truthy, ...p);
+ if (kind.falsy) {
+ visitBlock(v, kind.falsy, ...p);
+ }
+ return;
+ case "loop":
+ if (v.visitLoopExpr?.(expr, kind, ...p) === "stop") return;
+ visitBlock(v, kind.body, ...p);
+ return;
+ case "while":
+ if (v.visitWhileExpr?.(expr, kind, ...p) === "stop") return;
+ visitExpr(v, kind.cond, ...p);
+ visitBlock(v, kind.body, ...p);
+ return;
+ case "for":
+ if (v.visitForExpr?.(expr, kind, ...p) === "stop") return;
+ visitPat(v, kind.pat, ...p);
+ visitExpr(v, kind.expr, ...p);
+ visitBlock(v, kind.body, ...p);
+ return;
+ case "c_for":
+ if (v.visitCForExpr?.(expr, kind, ...p) === "stop") return;
+ if (kind.decl) {
+ visitStmt(v, kind.decl, ...p);
+ }
+ if (kind.cond) {
+ visitExpr(v, kind.cond, ...p);
+ }
+ if (kind.incr) {
+ visitStmt(v, kind.incr, ...p);
+ }
return;
}
exhausted(kind);
@@ -270,13 +388,64 @@ export function visitTy<
case "error":
if (v.visitErrorTy?.(ty, ...p) === "stop") return;
return;
- case "ident":
- if (v.visitIdentTy?.(ty, kind, ...p) === "stop") return;
+ case "path":
+ if (v.visitPathTy?.(ty, kind, ...p) === "stop") return;
+ v.visitPath?.(kind, ...p);
+ return;
+ case "ref":
+ if (v.visitRefTy?.(ty, kind, ...p) === "stop") return;
+ v.visitTy?.(kind.ty, ...p);
+ return;
+ case "ptr":
+ if (v.visitPtrTy?.(ty, kind, ...p) === "stop") return;
+ v.visitTy?.(kind.ty, ...p);
+ return;
+ case "slice":
+ if (v.visitSliceTy?.(ty, kind, ...p) === "stop") return;
+ v.visitTy?.(kind.ty, ...p);
+ return;
+ case "array":
+ if (v.visitArrayTy?.(ty, kind, ...p) === "stop") return;
+ v.visitTy?.(kind.ty, ...p);
+ v.visitExpr?.(kind.length, ...p);
+ return;
+ case "anon_struct":
+ if (v.visitAnonStructTy?.(ty, kind, ...p) === "stop") return;
+ for (const field of kind.fields) {
+ v.visitIdent?.(field.ident, ...p);
+ v.visitTy?.(field.ty, ...p);
+ }
return;
}
exhausted(kind);
}
+export function visitBlock<
+ P extends PM = [],
+>(
+ v: Visitor
,
+ block: Block,
+ ...p: P
+) {
+ v.visitBlock?.(block, ...p);
+ for (const stmt of block.stmts) {
+ visitStmt(v, stmt, ...p);
+ }
+ if (block.expr) {
+ visitExpr(v, block.expr, ...p);
+ }
+}
+
+export function visitPath<
+ P extends PM = [],
+>(
+ v: Visitor
,
+ path: Path,
+ ...p: P
+) {
+ v.visitPath?.(path, ...p);
+}
+
export function visitIdent<
P extends PM = [],
>(
diff --git a/compiler/main.ts b/compiler/main.ts
index 6cbe952..46b48a8 100644
--- a/compiler/main.ts
+++ b/compiler/main.ts
@@ -63,7 +63,7 @@ export class FileTreeAstCollector implements ast.Visitor<[_P]> {
this.superFile,
text,
);
- const fileAst = new Parser(file, text).parse();
+ const fileAst = new Parser(this.ctx, file).parse();
this.ctx.addFileAst(file, fileAst);
ast.visitFile(this, fileAst, { file });
await this.subFilePromise;
diff --git a/compiler/parser/lexer.ts b/compiler/parser/lexer.ts
new file mode 100644
index 0000000..7b6325c
--- /dev/null
+++ b/compiler/parser/lexer.ts
@@ -0,0 +1,338 @@
+import { Ctx, File } from "../ctx.ts";
+import { Pos, Span } from "../diagnostics.ts";
+import { ControlFlow, range } from "../util.ts";
+import { Token, TokenIter } from "./token.ts";
+
+export class Lexer implements TokenIter {
+ private idx = 0;
+ private line = 1;
+ private col = 1;
+
+ private text: string;
+
+ public constructor(
+ private ctx: Ctx,
+ private file: File,
+ ) {
+ this.text = ctx.fileInfo(file).text;
+ }
+
+ next(): Token | null {
+ if (this.done()) {
+ return null;
+ }
+ let cf: ControlFlow;
+ if (
+ cf = this.lexWithTail(
+ (span) => this.token("whitespace", span),
+ /[ \t\r\n]/,
+ ), cf.break
+ ) {
+ return cf.val;
+ }
+ if (
+ cf = this.lexWithTail(
+ (span, val) => {
+ return keywords.has(val)
+ ? this.token(val, span)
+ : this.token("ident", span, {
+ type: "ident",
+ identValue: val,
+ });
+ },
+ /[a-zA-Z_]/,
+ /[a-zA-Z0-9_]/,
+ ), cf.break
+ ) {
+ return cf.val;
+ }
+ if (
+ cf = this.lexWithTail(
+ (span, val) =>
+ this.token("int", span, {
+ type: "int",
+ intValue: parseInt(val),
+ }),
+ /[1-9]/,
+ /[0-9]/,
+ ), cf.break
+ ) {
+ return cf.val;
+ }
+ const begin = this.pos();
+ let end = begin;
+ const pos = begin;
+ if (this.test("0")) {
+ this.step();
+ if (!this.done() && this.test(/[0-9]/)) {
+ this.report("invalid number", pos);
+ return this.token("error", { begin, end });
+ }
+ return this.token("int", { begin, end }, {
+ type: "int",
+ intValue: 0,
+ });
+ }
+
+ if (this.test("'")) {
+ this.step();
+ let value: string;
+ if (this.test("\\")) {
+ this.step();
+ if (this.done()) {
+ this.report("malformed character literal", pos);
+ return this.token("error", { begin, end });
+ }
+ value = {
+ n: "\n",
+ t: "\t",
+ "0": "\0",
+ }[this.current()] ?? this.current();
+ } else {
+ value = this.current();
+ }
+ this.step();
+ if (this.done() || !this.test("'") || value.length === 0) {
+ this.report("malformed character literal", pos);
+ return this.token("error", { begin, end });
+ }
+ this.step();
+ return this.token("int", { begin, end }, {
+ type: "int",
+ intValue: value.charCodeAt(0),
+ });
+ }
+
+ if (this.test('"')) {
+ this.step();
+ let value = "";
+ while (!this.done() && !this.test('"')) {
+ if (this.test("\\")) {
+ this.step();
+ if (this.done()) {
+ break;
+ }
+ value += {
+ n: "\n",
+ t: "\t",
+ "0": "\0",
+ }[this.current()] ?? this.current();
+ } else {
+ value += this.current();
+ }
+ this.step();
+ }
+ if (this.done() || !this.test('"')) {
+ this.report("unclosed/malformed string", pos);
+ return this.token("error", { begin, end });
+ }
+ this.step();
+ return this.token("string", { begin, end }, {
+ type: "string",
+ stringValue: value,
+ });
+ }
+
+ if (this.test("/")) {
+ this.step();
+
+ if (this.test("/")) {
+ while (!this.done() && !this.test("\n")) {
+ end = this.pos();
+ this.step();
+ }
+ return this.token("comment", { begin, end });
+ }
+
+ if (this.test("*")) {
+ end = this.pos();
+ this.step();
+ let depth = 1;
+ let last: string | undefined = undefined;
+ while (!this.done() && depth > 0) {
+ if (last === "*" && this.current() === "/") {
+ depth -= 1;
+ last = undefined;
+ } else if (last === "/" && this.current() === "*") {
+ depth += 1;
+ last = undefined;
+ } else {
+ last = this.current();
+ }
+ end = this.pos();
+ this.step();
+ }
+ if (depth !== 0) {
+ this.report("unclosed/malformed multiline comment", pos);
+ return this.token("comment", { begin, end });
+ }
+ }
+
+ return this.token("/", { begin, end });
+ }
+
+ const match = this.text.slice(this.idx).match(
+ new RegExp(`^(${
+ staticTokenRes
+ .map((tok) => tok.length > 1 ? `(?:${tok})` : tok)
+ .join("|")
+ })`),
+ );
+ if (match) {
+ for (const _ of range(match[1].length)) {
+ end = this.pos();
+ this.step();
+ }
+ return this.token(match[1], { begin, end });
+ }
+
+ this.report(`illegal character '${this.current()}'`, pos);
+ this.step();
+ return this.next();
+ }
+
+ private lexWithTail(
+ builder: (span: Span, val: string) => R,
+ startPat: RegExp,
+ tailPat = startPat,
+ ): ControlFlow {
+ const begin = this.pos();
+ if (!this.test(startPat)) {
+ return ControlFlow.Continue(undefined);
+ }
+ let end = begin;
+ let val = this.current();
+ this.step();
+ while (this.test(tailPat)) {
+ end = begin;
+ val += this.current();
+ this.step();
+ }
+ return ControlFlow.Break(builder({ begin, end }, val));
+ }
+
+ private done(): boolean {
+ return this.idx >= this.text.length;
+ }
+
+ private current(): string {
+ return this.text[this.idx];
+ }
+
+ private step() {
+ if (this.done()) {
+ return;
+ }
+ if (this.current() === "\n") {
+ this.line += 1;
+ this.col = 1;
+ } else {
+ this.col += 1;
+ }
+ this.idx += 1;
+ }
+
+ private pos(): Pos {
+ return {
+ idx: this.idx,
+ line: this.line,
+ col: this.col,
+ };
+ }
+
+ private token(type: string, span: Span, token?: Partial): Token {
+ const length = span.end.idx - span.begin.idx + 1;
+ return { type, span, length, ...token };
+ }
+
+ private test(pattern: RegExp | string): boolean {
+ if (this.done()) {
+ return false;
+ }
+ if (typeof pattern === "string") {
+ return this.current() === pattern;
+ } else if (pattern.source.startsWith("^")) {
+ return pattern.test(this.text.slice(this.idx));
+ } else {
+ return pattern.test(this.current());
+ }
+ }
+
+ private report(msg: string, pos: Pos) {
+ this.ctx.report({
+ severity: "error",
+ origin: "parser",
+ file: this.file,
+ msg,
+ pos,
+ });
+ }
+}
+
+const keywords = new Set([
+ "false",
+ "true",
+ "null",
+ "int",
+ "bool",
+ "string",
+ "return",
+ "break",
+ "continue",
+ "let",
+ "mut",
+ "fn",
+ "loop",
+ "if",
+ "else",
+ "struct",
+ "enum",
+ "or",
+ "and",
+ "not",
+ "while",
+ "for",
+ "in",
+ "mod",
+ "pub",
+ "use",
+ "type_alias",
+]);
+
+const staticTokens = [
+ "=",
+ "==",
+ "<",
+ "<=",
+ ">",
+ ">=",
+ "-",
+ "->",
+ "!",
+ "!=",
+ "+",
+ "+=",
+ "-=",
+ ":",
+ "::",
+ "::<",
+ "(",
+ ")",
+ "{",
+ "}",
+ "[",
+ "]",
+ "<",
+ ">",
+ ".",
+ ",",
+ ":",
+ ";",
+ "#",
+ "&",
+ "0",
+] as const;
+
+const staticTokenRes = staticTokens
+ .toSorted((a, b) => b.length - a.length)
+ .map((tok) => tok.split("").map((c) => `\\${c}`).join(""));
diff --git a/compiler/parser/parser.ts b/compiler/parser/parser.ts
index 15186af..aa66507 100644
--- a/compiler/parser/parser.ts
+++ b/compiler/parser/parser.ts
@@ -1,14 +1,98 @@
-import { File } from "../ast/ast.ts";
-import { File as CtxFile } from "../ctx.ts";
+import {
+ Expr,
+ ExprKind,
+ File,
+ Ident,
+ Item,
+ ItemKind,
+ Pat,
+ PatKind,
+ Stmt,
+ StmtKind,
+ Ty,
+ TyKind,
+} from "../ast/ast.ts";
+import { Ctx, File as CtxFile } from "../ctx.ts";
+import { Span } from "../diagnostics.ts";
import { todo } from "../util.ts";
+import { Lexer } from "./lexer.ts";
+import { Token } from "./token.ts";
export class Parser {
+ private lexer: Lexer;
+ private currentToken: Token | null;
+
public constructor(
+ private ctx: Ctx,
private file: CtxFile,
- private text: string,
- ) {}
+ ) {
+ this.lexer = new Lexer(this.ctx, this.file);
+ this.currentToken = this.lexer.next();
+ }
public parse(): File {
- return todo();
+ return this.parseStmts();
+ }
+
+ private parseStmts(): Stmt[] {
+ const stmts: Stmt[] = [];
+ while (!this.done()) {
+ stmts.push(this.parseStmt());
+ }
+ return stmts;
+ }
+
+ private step() {
+ this.currentToken = this.lexer.next();
+ }
+ private done(): boolean {
+ return this.currentToken == null;
+ }
+ private current(): Token {
+ return this.currentToken!;
+ }
+ private pos(): Pos {
+ if (this.done()) {
+ return this.lexer.currentPos();
+ }
+ return this.current().pos;
+ }
+
+ private test(type: string): boolean {
+ return !this.done() && this.current().type === type;
+ }
+
+ private report(msg: string, pos = this.pos()) {
+ this.reporter.reportError({
+ msg,
+ pos,
+ reporter: "Parser",
+ });
+ printStackTrace();
+ }
+
+ private stmt(kind: StmtKind, span: Span): Stmt {
+ return { kind, span };
+ }
+
+ private item(
+ kind: ItemKind,
+ span: Span,
+ ident: Ident,
+ pub: boolean,
+ ): Item {
+ return { kind, span, ident, pub };
+ }
+
+ private expr(kind: ExprKind, span: Span): Expr {
+ return { kind, span };
+ }
+
+ private pat(kind: PatKind, span: Span): Pat {
+ return { kind, span };
+ }
+
+ private ty(kind: TyKind, span: Span): Ty {
+ return { kind, span };
}
}
diff --git a/compiler/parser/token.ts b/compiler/parser/token.ts
new file mode 100644
index 0000000..ecfdb6a
--- /dev/null
+++ b/compiler/parser/token.ts
@@ -0,0 +1,32 @@
+import { Span } from "../diagnostics.ts";
+
+export type Token = TokenData & {
+ span: Span;
+ length: number;
+};
+
+export type TokenData =
+ | { type: "ident"; identValue: string }
+ | { type: "int"; intValue: number }
+ | { type: "string"; stringValue: string }
+ | { type: string };
+
+export interface TokenIter {
+ next(): Token | null;
+}
+
+export class SigFilter implements TokenIter {
+ public constructor(private iter: TokenIter) {}
+
+ next(): Token | null {
+ const token = this.iter.next();
+ if (token === null) {
+ return token;
+ }
+ if (token?.type === "whitespace" || token?.type === "comment") {
+ return this.next();
+ }
+ return token;
+ }
+}
+
diff --git a/compiler/util.ts b/compiler/util.ts
index 7b8b9bf..fba49e0 100644
--- a/compiler/util.ts
+++ b/compiler/util.ts
@@ -7,3 +7,25 @@ export function exhausted(_: never) {
class Unexhausted extends Error {}
throw new Unexhausted();
}
+
+export type Res = Ok | Err;
+export type Ok = { ok: true; val: V };
+export type Err = { ok: false; val: E };
+
+export const Ok = (val: V): Ok => ({ ok: true, val });
+export const Err = (val: E): Err => ({ ok: false, val });
+
+export type ControlFlow<
+ R = undefined,
+ V = undefined,
+> = Break | Continue;
+
+export type Break = { break: true; val: R };
+export type Continue = { break: false; val: V };
+
+export const ControlFlow = {
+ Break: (val: R): Break => ({ break: true, val }),
+ Continue: (val: V): Continue => ({ break: false, val }),
+} as const;
+
+export const range = (length: number) => (new Array(length).fill(0));