import { Position, StaticTokenType, Token, TokenIter } from "./token.ts"; export class Lexer implements TokenIter { private index = 0; private line = 1; private col = 1; public constructor(private text: string) {} public next(): Token { if (this.done()) { return { pos: this.position(), tokenType: "eof" }; } else if (" \t\r\n".includes(this.current())) { this.step(); while (" \t\r\n".includes(this.current())) this.step(); return this.next(); } else if (Lexer.idStartChars.includes(this.current())) { const pos = this.position(); let valueString = this.current(); this.step(); while (Lexer.idChars.includes(this.current())) { valueString += this.current(); this.step(); } if (valueString in Lexer.keywordTokenTypes) { return { pos, tokenType: Lexer.keywordTokenTypes[valueString] }; } else { return { pos, tokenType: "id", value: valueString, }; } } else if (Lexer.intStartChars.includes(this.current())) { const pos = this.position(); let valueString = this.current(); this.step(); while (Lexer.intChars.includes(this.current())) { valueString += this.current(); this.step(); } return { pos, tokenType: "int", value: parseInt(valueString), }; } else if (this.current() in Lexer.singleStaticTokenTypes) { const pos = this.position(); const tokenType = Lexer.singleStaticTokenTypes[this.current()]; this.step(); return { pos, tokenType }; } else if (this.currentIs("+")) { const pos = this.position(); this.step(); if (this.currentIs("+")) { this.step(); return { pos, tokenType: "plusplus" }; } else if (this.currentIs("=")) { this.step(); return { pos, tokenType: "plusequal" }; } else { return { pos, tokenType: "plus" }; } } else if (this.currentIs("-")) { const pos = this.position(); this.step(); if (this.currentIs("-")) { this.step(); return { pos, tokenType: "minusminus" }; } else if (this.currentIs(">")) { this.step(); return { pos, tokenType: "minusgt" }; } else if (this.currentIs("=")) { this.step(); return { pos, tokenType: "minusequal" }; } else { return { pos, tokenType: "minus" }; } } else if (this.currentIs("&")) { const pos = this.position(); this.step(); if (this.currentIs("=")) { this.step(); return { pos, tokenType: "ampersandequal" }; } else { return { pos, tokenType: "ampersand" }; } } else if (this.currentIs("|")) { const pos = this.position(); this.step(); if (this.currentIs("=")) { this.step(); return { pos, tokenType: "pipeequal" }; } else { return { pos, tokenType: "pipe" }; } } else if (this.currentIs("^")) { const pos = this.position(); this.step(); if (this.currentIs("=")) { this.step(); return { pos, tokenType: "hatequal" }; } else { return { pos, tokenType: "hat" }; } } else if (this.currentIs("=")) { const pos = this.position(); this.step(); if (this.currentIs("=")) { this.step(); return { pos, tokenType: "equalequal" }; } else { return { pos, tokenType: "equal" }; } } else if (this.currentIs("!")) { const pos = this.position(); this.step(); if (this.currentIs("=")) { this.step(); return { pos, tokenType: "exclamationequal" }; } else { return { pos, tokenType: "exclamation" }; } } else if (this.currentIs("<")) { const pos = this.position(); this.step(); if (this.currentIs("=")) { this.step(); return { pos, tokenType: "ltequal" }; } else { return { pos, tokenType: "lt" }; } } else if (this.currentIs(">")) { const pos = this.position(); this.step(); if (this.currentIs("=")) { this.step(); return { pos, tokenType: "gtequal" }; } else { return { pos, tokenType: "gt" }; } } else { const pos = this.position(); this.step(); return { pos, tokenType: "invalid" }; } } private static intStartChars = "123456789" as const; private static intChars = "1234567890" as const; private static idStartChars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ_" as const; private static idChars = Lexer.idStartChars + Lexer.intChars; private static singleStaticTokenTypes: { [key: string]: StaticTokenType } = { "(": "lparen", ")": "rparen", "{": "lbrace", "}": "rbrace", "[": "lbracket", "]": "rbracket", ".": "dot", ",": "comma", ":": "colon", ";": "semicolon", "~": "tilde", "*": "asterisk", } as const; private static keywordTokenTypes: { [key: string]: StaticTokenType } = { "let": "let", "mut": "mut", "not": "not", "and": "and", "or": "or", "fn": "fn", "return": "return", "if": "fn", "else": "else", "loop": "loop", "while": "while", "break": "break", "continue": "continue", } as const; private step() { this.index++; if (!this.done()) { if (this.current() == "\n") { this.line++; this.col = 1; } else { this.col++; } } } private position(): Position { return { index: this.index, line: this.line, col: this.col, }; } private currentIs(char: string): boolean { return !this.done() && this.current() == char; } private current(): string { return this.text[this.index]; } private done(): boolean { return this.index >= this.text.length; } }