import { Reporter } from "./info.ts"; import { Pos, Token } from "./token.ts"; export class Lexer { private index = 0; private line = 1; private col = 1; public constructor(private text: string, private reporter: Reporter) {} public next(): Token | null { if (this.done()) { return null; } const pos = this.pos(); if (this.test(/[ \t\n\r]/)) { while (!this.done() && this.test(/[ \t\n\r]/)) { this.step(); } return this.next(); } if (this.test(/[a-zA-Z_]/)) { let value = ""; while (!this.done() && this.test(/[a-zA-Z0-9_]/)) { value += this.current(); this.step(); } switch (value) { case "break": return this.token("break", pos); case "return": return this.token("return", pos); case "let": return this.token("let", pos); case "fn": return this.token("fn", pos); case "loop": return this.token("loop", pos); case "if": return this.token("if", pos); case "else": return this.token("else", pos); case "struct": return this.token("struct", pos); case "import": return this.token("import", pos); default: return { ...this.token("ident", pos), identValue: value }; } } if (this.test(/[1-9]/)) { let textValue = ""; while (!this.done() && this.test(/[0-9]/)) { textValue += this.current(); this.step(); } return { ...this.token("int", pos), intValue: parseInt(textValue) }; } if (this.test("0")) { this.step(); if (!this.done() && this.test(/[0-9]/)) { this.report("invalid number", pos); return this.token("error", pos); } return { ...this.token("int", pos), intValue: 0 }; } if (this.test('"')) { this.step(); let value = ""; while (!this.done() && !this.test('"')) { if (this.test("\\")) { this.step(); if (this.done()) { break; } value += { "n": "\n", "t": "\t", "0": "\0", }[this.current()] ?? this.current(); } else { value += this.current(); } this.step(); } if (this.done() || !this.test('"')) { this.report("unclosed/malformed string", pos); return this.token("error", pos); } this.step(); return { ...this.token("string", pos), stringValue: value }; } if (this.test(/[\+\{\};=\-\*\(\)\.,:;\[\]>" && !this.done() && this.test("=")) { this.step(); return this.token(">=", pos); } if (first === "-" && !this.done()) { if (this.test(">")) { this.step(); return this.token("->", pos); } if (this.test("=")) { this.step(); return this.token("-=", pos); } } if (first === "!" && !this.done() && this.test("=")) { this.step(); return this.token("!=", pos); } if (first === "+" && !this.done() && this.test("=")) { this.step(); return this.token("+=", pos); } if (first === "-" && !this.done() && this.test(">")) { this.step(); return this.token("->", pos); } return this.token(first, pos); } if (this.test("/")) { this.step(); if (this.test("/")) { while (!this.done() && !this.test("\n")) { this.step(); } return this.next(); } return this.token("/", pos); } if (this.test("false")) { this.step(); return this.token("false", pos); } if (this.test("true")) { this.step(); return this.token("true", pos); } if (this.test("null")) { this.step(); return this.token("null", pos); } if (this.test("or")) { this.step(); return this.token("or", pos); } if (this.test("and")) { this.step(); return this.token("and", pos); } if (this.test("not")) { this.step(); return this.token("not", pos); } if (this.test("loop")) { this.step(); return this.token("loop", pos); } if (this.test("break")) { this.step(); return this.token("break", pos); } if (this.test("let")) { this.step(); return this.token("let", pos); } if (this.test("fn")) { this.step(); return this.token("fn", pos); } if (this.test("return")) { this.step(); return this.token("return", pos); } this.report(`illegal character '${this.current()}'`, pos); this.step(); return this.next(); } private done(): boolean { return this.index >= this.text.length; } private current(): string { return this.text[this.index]; } public currentPos(): Pos { return this.pos(); } private step() { if (this.done()) { return; } if (this.current() === "\n") { this.line += 1; this.col = 1; } else { this.col += 1; } this.index += 1; } private pos(): Pos { return { index: this.index, line: this.line, col: this.col, }; } private token(type: string, pos: Pos): Token { const length = this.index - pos.index; return { type, pos, length }; } private test(pattern: RegExp | string): boolean { if (typeof pattern === "string") { return this.current() === pattern; } else { return pattern.test(this.current()); } } private report(msg: string, pos: Pos) { this.reporter.reportError({ msg, pos, reporter: "Lexer", }); } }