import { Reporter } from "./info.ts"; import { Pos, Token } from "./token.ts"; export class Lexer { private index = 0; private line = 1; private col = 1; public constructor(private text: string, private reporter: Reporter) {} public next(): Token | null { if (this.done()) { return null; } const pos = this.pos(); if (this.test(/[ \t\n\r]/)) { while (!this.done() && this.test(/[ \t\n\r]/)) { this.step(); } return this.next(); } if (this.test(/[a-zA-Z_]/)) { let value = ""; while (!this.done() && this.test(/[a-zA-Z0-9_]/)) { value += this.current(); this.step(); } const keywords = [ "false", "true", "null", "int", "bool", "string", "break", "return", "let", "fn", "loop", "if", "else", "struct", "or", "and", "not", "while", "for", "in", "mod", "pub", "use", "type_alias", ]; if (keywords.includes(value)) { return this.token(value, pos); } else { return { ...this.token("ident", pos), identValue: value }; } } if (this.test(/[1-9]/)) { let textValue = ""; while (!this.done() && this.test(/[0-9]/)) { textValue += this.current(); this.step(); } return { ...this.token("int", pos), intValue: parseInt(textValue) }; } if (this.test("0")) { this.step(); if (!this.done() && this.test(/[0-9]/)) { this.report("invalid number", pos); return this.token("error", pos); } return { ...this.token("int", pos), intValue: 0 }; } if (this.test("'")) { this.step(); let value: string; if (this.test("\\")) { this.step(); if (this.done()) { this.report("malformed character literal", pos); return this.token("error", pos); } value = { n: "\n", t: "\t", "0": "\0", }[this.current()] ?? this.current(); } else { value = this.current(); } this.step(); if (this.done() || !this.test("'") || value.length === 0) { this.report("malformed character literal", pos); return this.token("error", pos); } this.step(); return { ...this.token("int", pos), intValue: value.charCodeAt(0) }; } if (this.test('"')) { this.step(); let value = ""; while (!this.done() && !this.test('"')) { if (this.test("\\")) { this.step(); if (this.done()) { break; } value += { n: "\n", t: "\t", "0": "\0", }[this.current()] ?? this.current(); } else { value += this.current(); } this.step(); } if (this.done() || !this.test('"')) { this.report("unclosed/malformed string", pos); return this.token("error", pos); } this.step(); return { ...this.token("string", pos), stringValue: value }; } if (this.test(/[\+\{\};=\-\*\(\)\.,:;\[\]>" && !this.done() && this.test("=")) { this.step(); return this.token(">=", pos); } if (first === "-" && !this.done()) { if (this.test(">")) { this.step(); return this.token("->", pos); } if (this.test("=")) { this.step(); return this.token("-=", pos); } } if (first === "!" && !this.done() && this.test("=")) { this.step(); return this.token("!=", pos); } if (first === "+" && !this.done() && this.test("=")) { this.step(); return this.token("+=", pos); } if (first === ":") { if (!this.done() && this.test(":")) { this.step(); if (!this.done() && this.test("<")) { this.step(); return this.token("::<", pos); } return this.token("::", pos); } } return this.token(first, pos); } if (this.test("/")) { this.step(); if (this.test("/")) { while (!this.done() && !this.test("\n")) { this.step(); } return this.next(); } return this.token("/", pos); } this.report(`illegal character '${this.current()}'`, pos); this.step(); return this.next(); } private done(): boolean { return this.index >= this.text.length; } private current(): string { return this.text[this.index]; } public currentPos(): Pos { return this.pos(); } private step() { if (this.done()) { return; } if (this.current() === "\n") { this.line += 1; this.col = 1; } else { this.col += 1; } this.index += 1; } private pos(): Pos { return { index: this.index, line: this.line, col: this.col, }; } private token(type: string, pos: Pos): Token { const length = this.index - pos.index; return { type, pos, length }; } private test(pattern: RegExp | string): boolean { if (typeof pattern === "string") { return this.current() === pattern; } else { return pattern.test(this.current()); } } private report(msg: string, pos: Pos) { this.reporter.reportError({ msg, pos, reporter: "Lexer", }); } }